summaryrefslogtreecommitdiff
path: root/drivers/cxl
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-25 09:19:23 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-25 09:19:23 -0800
commit7c3dc440b1f5c75f45e24430f913e561dc82a419 (patch)
tree7c3e037691d712a20f7e95279cc1e090328d5d44 /drivers/cxl
parentd8e473182ab9e85708067be81d20424045d939fa (diff)
parente686c32590f40bffc45f105c04c836ffad3e531a (diff)
downloadlwn-7c3dc440b1f5c75f45e24430f913e561dc82a419.tar.gz
lwn-7c3dc440b1f5c75f45e24430f913e561dc82a419.zip
Merge tag 'cxl-for-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull Compute Express Link (CXL) updates from Dan Williams: "To date Linux has been dependent on platform-firmware to map CXL RAM regions and handle events / errors from devices. With this update we can now parse / update the CXL memory layout, and report events / errors from devices. This is a precursor for the CXL subsystem to handle the end-to-end "RAS" flow for CXL memory. i.e. the flow that for DDR-attached-DRAM is handled by the EDAC driver where it maps system physical address events to a field-replaceable-unit (FRU / endpoint device). In general, CXL has the potential to standardize what has historically been a pile of memory-controller-specific error handling logic. Another change of note is the default policy for handling RAM-backed device-dax instances. Previously the default access mode was "device", mmap(2) a device special file to access memory. The new default is "kmem" where the address range is assigned to the core-mm via add_memory_driver_managed(). This saves typical users from wondering why their platform memory is not visible via free(1) and stuck behind a device-file. At the same time it allows expert users to deploy policy to, for example, get dedicated access to high performance memory, or hide low performance memory from general purpose kernel allocations. This affects not only CXL, but also systems with high-bandwidth-memory that platform-firmware tags with the EFI_MEMORY_SP (special purpose) designation. Summary: - CXL RAM region enumeration: instantiate 'struct cxl_region' objects for platform firmware created memory regions - CXL RAM region provisioning: complement the existing PMEM region creation support with RAM region support - "Soft Reservation" policy change: Online (memory hot-add) soft-reserved memory (EFI_MEMORY_SP) by default, but still allow for setting aside such memory for dedicated access via device-dax. - CXL Events and Interrupts: Takeover CXL event handling from platform-firmware (ACPI calls this CXL Memory Error Reporting) and export CXL Events via Linux Trace Events. - Convey CXL _OSC results to drivers: Similar to PCI, let the CXL subsystem interrogate the result of CXL _OSC negotiation. - Emulate CXL DVSEC Range Registers as "decoders": Allow for first-generation devices that pre-date the definition of the CXL HDM Decoder Capability to translate the CXL DVSEC Range Registers into 'struct cxl_decoder' objects. - Set timestamp: Per spec, set the device timestamp in case of hotplug, or if platform-firwmare failed to set it. - General fixups: linux-next build issues, non-urgent fixes for pre-production hardware, unit test fixes, spelling and debug message improvements" * tag 'cxl-for-6.3' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (66 commits) dax/kmem: Fix leak of memory-hotplug resources cxl/mem: Add kdoc param for event log driver state cxl/trace: Add serial number to trace points cxl/trace: Add host output to trace points cxl/trace: Standardize device information output cxl/pci: Remove locked check for dvsec_range_allowed() cxl/hdm: Add emulation when HDM decoders are not committed cxl/hdm: Create emulated cxl_hdm for devices that do not have HDM decoders cxl/hdm: Emulate HDM decoder from DVSEC range registers cxl/pci: Refactor cxl_hdm_decode_init() cxl/port: Export cxl_dvsec_rr_decode() to cxl_port cxl/pci: Break out range register decoding from cxl_hdm_decode_init() cxl: add RAS status unmasking for CXL cxl: remove unnecessary calling of pci_enable_pcie_error_reporting() dax/hmem: build hmem device support as module if possible dax: cxl: add CXL_REGION dependency cxl: avoid returning uninitialized error code cxl/pmem: Fix nvdimm registration races cxl/mem: Fix UAPI command comment cxl/uapi: Tag commands from cxl_query_cmd() ...
Diffstat (limited to 'drivers/cxl')
-rw-r--r--drivers/cxl/Kconfig14
-rw-r--r--drivers/cxl/acpi.c5
-rw-r--r--drivers/cxl/core/Makefile3
-rw-r--r--drivers/cxl/core/core.h7
-rw-r--r--drivers/cxl/core/hdm.c144
-rw-r--r--drivers/cxl/core/mbox.c263
-rw-r--r--drivers/cxl/core/memdev.c3
-rw-r--r--drivers/cxl/core/pci.c316
-rw-r--r--drivers/cxl/core/port.c123
-rw-r--r--drivers/cxl/core/region.c878
-rw-r--r--drivers/cxl/core/trace.c5
-rw-r--r--drivers/cxl/core/trace.h606
-rw-r--r--drivers/cxl/cxl.h96
-rw-r--r--drivers/cxl/cxlmem.h186
-rw-r--r--drivers/cxl/cxlpci.h12
-rw-r--r--drivers/cxl/pci.c425
-rw-r--r--drivers/cxl/pmem.c1
-rw-r--r--drivers/cxl/port.c119
18 files changed, 2777 insertions, 429 deletions
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index 0ac53c422c31..ff4e78117b31 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -104,19 +104,29 @@ config CXL_SUSPEND
depends on SUSPEND && CXL_MEM
config CXL_REGION
- bool
+ bool "CXL: Region Support"
default CXL_BUS
# For MAX_PHYSMEM_BITS
depends on SPARSEMEM
select MEMREGION
select GET_FREE_REGION
+ help
+ Enable the CXL core to enumerate and provision CXL regions. A CXL
+ region is defined by one or more CXL expanders that decode a given
+ system-physical address range. For CXL regions established by
+ platform-firmware this option enables memory error handling to
+ identify the devices participating in a given interleaved memory
+ range. Otherwise, platform-firmware managed CXL is enabled by being
+ placed in the system address map and does not need a driver.
+
+ If unsure say 'y'
config CXL_REGION_INVALIDATION_TEST
bool "CXL: Region Cache Management Bypass (TEST)"
depends on CXL_REGION
help
CXL Region management and security operations potentially invalidate
- the content of CPU caches without notifiying those caches to
+ the content of CPU caches without notifying those caches to
invalidate the affected cachelines. The CXL Region driver attempts
to invalidate caches when those events occur. If that invalidation
fails the region will fail to enable. Reasons for cache
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 13cde44c6086..7e1765b09e04 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -19,7 +19,7 @@ struct cxl_cxims_data {
/*
* Find a targets entry (n) in the host bridge interleave list.
- * CXL Specfication 3.0 Table 9-22
+ * CXL Specification 3.0 Table 9-22
*/
static int cxl_xor_calc_n(u64 hpa, struct cxl_cxims_data *cximsd, int iw,
int ig)
@@ -731,7 +731,8 @@ static void __exit cxl_acpi_exit(void)
cxl_bus_drain();
}
-module_init(cxl_acpi_init);
+/* load before dax_hmem sees 'Soft Reserved' CXL ranges */
+subsys_initcall(cxl_acpi_init);
module_exit(cxl_acpi_exit);
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(CXL);
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 79c7257f4107..ca4ae31d8f57 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -3,6 +3,8 @@ obj-$(CONFIG_CXL_BUS) += cxl_core.o
obj-$(CONFIG_CXL_SUSPEND) += suspend.o
ccflags-y += -I$(srctree)/drivers/cxl
+CFLAGS_trace.o = -DTRACE_INCLUDE_PATH=. -I$(src)
+
cxl_core-y := port.o
cxl_core-y += pmem.o
cxl_core-y += regs.o
@@ -10,4 +12,5 @@ cxl_core-y += memdev.o
cxl_core-y += mbox.o
cxl_core-y += pci.o
cxl_core-y += hdm.o
+cxl_core-$(CONFIG_TRACING) += trace.o
cxl_core-$(CONFIG_CXL_REGION) += region.o
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 8c04672dca56..cde475e13216 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -11,15 +11,18 @@ extern struct attribute_group cxl_base_attribute_group;
#ifdef CONFIG_CXL_REGION
extern struct device_attribute dev_attr_create_pmem_region;
+extern struct device_attribute dev_attr_create_ram_region;
extern struct device_attribute dev_attr_delete_region;
extern struct device_attribute dev_attr_region;
extern const struct device_type cxl_pmem_region_type;
+extern const struct device_type cxl_dax_region_type;
extern const struct device_type cxl_region_type;
void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
#define CXL_REGION_ATTR(x) (&dev_attr_##x.attr)
#define CXL_REGION_TYPE(x) (&cxl_region_type)
#define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr),
#define CXL_PMEM_REGION_TYPE(x) (&cxl_pmem_region_type)
+#define CXL_DAX_REGION_TYPE(x) (&cxl_dax_region_type)
int cxl_region_init(void);
void cxl_region_exit(void);
#else
@@ -37,6 +40,7 @@ static inline void cxl_region_exit(void)
#define CXL_REGION_TYPE(x) NULL
#define SET_CXL_REGION_ATTR(x)
#define CXL_PMEM_REGION_TYPE(x) NULL
+#define CXL_DAX_REGION_TYPE(x) NULL
#endif
struct cxl_send_command;
@@ -56,9 +60,6 @@ resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled);
resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled);
extern struct rw_semaphore cxl_dpa_rwsem;
-bool is_switch_decoder(struct device *dev);
-struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
-
int cxl_memdev_init(void);
void cxl_memdev_exit(void);
void cxl_mbox_init(void);
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index dcc16d7cb8f3..45deda18ed32 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -101,11 +101,34 @@ static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb,
BIT(CXL_CM_CAP_CAP_ID_HDM));
}
+static struct cxl_hdm *devm_cxl_setup_emulated_hdm(struct cxl_port *port,
+ struct cxl_endpoint_dvsec_info *info)
+{
+ struct device *dev = &port->dev;
+ struct cxl_hdm *cxlhdm;
+
+ if (!info->mem_enabled)
+ return ERR_PTR(-ENODEV);
+
+ cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
+ if (!cxlhdm)
+ return ERR_PTR(-ENOMEM);
+
+ cxlhdm->port = port;
+ cxlhdm->decoder_count = info->ranges;
+ cxlhdm->target_count = info->ranges;
+ dev_set_drvdata(&port->dev, cxlhdm);
+
+ return cxlhdm;
+}
+
/**
* devm_cxl_setup_hdm - map HDM decoder component registers
* @port: cxl_port to map
+ * @info: cached DVSEC range register info
*/
-struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port)
+struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
+ struct cxl_endpoint_dvsec_info *info)
{
struct device *dev = &port->dev;
struct cxl_hdm *cxlhdm;
@@ -119,6 +142,9 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port)
cxlhdm->port = port;
crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
if (!crb) {
+ if (info && info->mem_enabled)
+ return devm_cxl_setup_emulated_hdm(port, info);
+
dev_err(dev, "No component registers mapped\n");
return ERR_PTR(-ENXIO);
}
@@ -279,7 +305,7 @@ success:
return 0;
}
-static int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
+int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
resource_size_t base, resource_size_t len,
resource_size_t skipped)
{
@@ -295,6 +321,7 @@ static int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled);
}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_dpa_reserve, CXL);
resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled)
{
@@ -676,12 +703,71 @@ static int cxl_decoder_reset(struct cxl_decoder *cxld)
port->commit_end--;
cxld->flags &= ~CXL_DECODER_F_ENABLE;
+ /* Userspace is now responsible for reconfiguring this decoder */
+ if (is_endpoint_decoder(&cxld->dev)) {
+ struct cxl_endpoint_decoder *cxled;
+
+ cxled = to_cxl_endpoint_decoder(&cxld->dev);
+ cxled->state = CXL_DECODER_STATE_MANUAL;
+ }
+
+ return 0;
+}
+
+static int cxl_setup_hdm_decoder_from_dvsec(struct cxl_port *port,
+ struct cxl_decoder *cxld, int which,
+ struct cxl_endpoint_dvsec_info *info)
+{
+ if (!is_cxl_endpoint(port))
+ return -EOPNOTSUPP;
+
+ if (!range_len(&info->dvsec_range[which]))
+ return -ENOENT;
+
+ cxld->target_type = CXL_DECODER_EXPANDER;
+ cxld->commit = NULL;
+ cxld->reset = NULL;
+ cxld->hpa_range = info->dvsec_range[which];
+
+ /*
+ * Set the emulated decoder as locked pending additional support to
+ * change the range registers at run time.
+ */
+ cxld->flags |= CXL_DECODER_F_ENABLE | CXL_DECODER_F_LOCK;
+ port->commit_end = cxld->id;
+
return 0;
}
+static bool should_emulate_decoders(struct cxl_port *port)
+{
+ struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
+ void __iomem *hdm = cxlhdm->regs.hdm_decoder;
+ u32 ctrl;
+ int i;
+
+ if (!is_cxl_endpoint(cxlhdm->port))
+ return false;
+
+ if (!hdm)
+ return true;
+
+ /*
+ * If any decoders are committed already, there should not be any
+ * emulated DVSEC decoders.
+ */
+ for (i = 0; i < cxlhdm->decoder_count; i++) {
+ ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
+ if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl))
+ return false;
+ }
+
+ return true;
+}
+
static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
int *target_map, void __iomem *hdm, int which,
- u64 *dpa_base)
+ u64 *dpa_base, struct cxl_endpoint_dvsec_info *info)
{
struct cxl_endpoint_decoder *cxled = NULL;
u64 size, base, skip, dpa_size;
@@ -694,6 +780,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
unsigned char target_id[8];
} target_list;
+ if (should_emulate_decoders(port))
+ return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info);
+
if (is_endpoint_decoder(&cxld->dev))
cxled = to_cxl_endpoint_decoder(&cxld->dev);
@@ -717,6 +806,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
.end = base + size - 1,
};
+ if (cxled && !committed && range_len(&info->dvsec_range[which]))
+ return cxl_setup_hdm_decoder_from_dvsec(port, cxld, which, info);
+
/* decoders are enabled if committed */
if (committed) {
cxld->flags |= CXL_DECODER_F_ENABLE;
@@ -783,21 +875,21 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
return rc;
}
*dpa_base += dpa_size + skip;
+
+ cxled->state = CXL_DECODER_STATE_AUTO;
+
return 0;
}
-/**
- * devm_cxl_enumerate_decoders - add decoder objects per HDM register set
- * @cxlhdm: Structure to populate with HDM capabilities
- */
-int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
+static void cxl_settle_decoders(struct cxl_hdm *cxlhdm)
{
void __iomem *hdm = cxlhdm->regs.hdm_decoder;
- struct cxl_port *port = cxlhdm->port;
- int i, committed;
- u64 dpa_base = 0;
+ int committed, i;
u32 ctrl;
+ if (!hdm)
+ return;
+
/*
* Since the register resource was recently claimed via request_region()
* be careful about trusting the "not-committed" status until the commit
@@ -814,6 +906,22 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
/* ensure that future checks of committed can be trusted */
if (committed != cxlhdm->decoder_count)
msleep(20);
+}
+
+/**
+ * devm_cxl_enumerate_decoders - add decoder objects per HDM register set
+ * @cxlhdm: Structure to populate with HDM capabilities
+ * @info: cached DVSEC range register info
+ */
+int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
+ struct cxl_endpoint_dvsec_info *info)
+{
+ void __iomem *hdm = cxlhdm->regs.hdm_decoder;
+ struct cxl_port *port = cxlhdm->port;
+ int i;
+ u64 dpa_base = 0;
+
+ cxl_settle_decoders(cxlhdm);
for (i = 0; i < cxlhdm->decoder_count; i++) {
int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 };
@@ -826,7 +934,8 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
cxled = cxl_endpoint_decoder_alloc(port);
if (IS_ERR(cxled)) {
dev_warn(&port->dev,
- "Failed to allocate the decoder\n");
+ "Failed to allocate decoder%d.%d\n",
+ port->id, i);
return PTR_ERR(cxled);
}
cxld = &cxled->cxld;
@@ -836,21 +945,26 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
cxlsd = cxl_switch_decoder_alloc(port, target_count);
if (IS_ERR(cxlsd)) {
dev_warn(&port->dev,
- "Failed to allocate the decoder\n");
+ "Failed to allocate decoder%d.%d\n",
+ port->id, i);
return PTR_ERR(cxlsd);
}
cxld = &cxlsd->cxld;
}
- rc = init_hdm_decoder(port, cxld, target_map, hdm, i, &dpa_base);
+ rc = init_hdm_decoder(port, cxld, target_map, hdm, i,
+ &dpa_base, info);
if (rc) {
+ dev_warn(&port->dev,
+ "Failed to initialize decoder%d.%d\n",
+ port->id, i);
put_device(&cxld->dev);
return rc;
}
rc = add_hdm_decoder(port, cxld, target_map);
if (rc) {
dev_warn(&port->dev,
- "Failed to add decoder to port\n");
+ "Failed to add decoder%d.%d\n", port->id, i);
return rc;
}
}
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index b03fba212799..f2addb457172 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -3,11 +3,13 @@
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/security.h>
#include <linux/debugfs.h>
+#include <linux/ktime.h>
#include <linux/mutex.h>
#include <cxlmem.h>
#include <cxl.h>
#include "core.h"
+#include "trace.h"
static bool cxl_raw_allow_all;
@@ -170,6 +172,12 @@ int cxl_internal_send_cmd(struct cxl_dev_state *cxlds,
out_size = mbox_cmd->size_out;
min_out = mbox_cmd->min_out;
rc = cxlds->mbox_send(cxlds, mbox_cmd);
+ /*
+ * EIO is reserved for a payload size mismatch and mbox_send()
+ * may not return this error.
+ */
+ if (WARN_ONCE(rc == -EIO, "Bad return code: -EIO"))
+ return -ENXIO;
if (rc)
return rc;
@@ -445,9 +453,14 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
* structures.
*/
cxl_for_each_cmd(cmd) {
- const struct cxl_command_info *info = &cmd->info;
+ struct cxl_command_info info = cmd->info;
- if (copy_to_user(&q->commands[j++], info, sizeof(*info)))
+ if (test_bit(info.id, cxlmd->cxlds->enabled_cmds))
+ info.flags |= CXL_MEM_COMMAND_FLAG_ENABLED;
+ if (test_bit(info.id, cxlmd->cxlds->exclusive_cmds))
+ info.flags |= CXL_MEM_COMMAND_FLAG_EXCLUSIVE;
+
+ if (copy_to_user(&q->commands[j++], &info, sizeof(info)))
return -EFAULT;
if (j == n_commands)
@@ -550,9 +563,9 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
return 0;
}
-static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 size, u8 *out)
+static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 *size, u8 *out)
{
- u32 remaining = size;
+ u32 remaining = *size;
u32 offset = 0;
while (remaining) {
@@ -576,6 +589,17 @@ static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 size, u8
};
rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+
+ /*
+ * The output payload length that indicates the number
+ * of valid bytes can be smaller than the Log buffer
+ * size.
+ */
+ if (rc == -EIO && mbox_cmd.size_out < xfer_size) {
+ offset += mbox_cmd.size_out;
+ break;
+ }
+
if (rc < 0)
return rc;
@@ -584,6 +608,8 @@ static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 size, u8
offset += xfer_size;
}
+ *size = offset;
+
return 0;
}
@@ -610,11 +636,12 @@ static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel)
if (!cmd) {
dev_dbg(cxlds->dev,
- "Opcode 0x%04x unsupported by driver", opcode);
+ "Opcode 0x%04x unsupported by driver\n", opcode);
continue;
}
set_bit(cmd->info.id, cxlds->enabled_cmds);
+ dev_dbg(cxlds->dev, "Opcode 0x%04x enabled\n", opcode);
}
}
@@ -694,7 +721,7 @@ int cxl_enumerate_cmds(struct cxl_dev_state *cxlds)
goto out;
}
- rc = cxl_xfer_log(cxlds, &uuid, size, log);
+ rc = cxl_xfer_log(cxlds, &uuid, &size, log);
if (rc) {
kvfree(log);
goto out;
@@ -717,6 +744,203 @@ out:
}
EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
+/*
+ * General Media Event Record
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+static const uuid_t gen_media_event_uuid =
+ UUID_INIT(0xfbcd0a77, 0xc260, 0x417f,
+ 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6);
+
+/*
+ * DRAM Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
+ */
+static const uuid_t dram_event_uuid =
+ UUID_INIT(0x601dcbb3, 0x9c06, 0x4eab,
+ 0xb8, 0xaf, 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24);
+
+/*
+ * Memory Module Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+static const uuid_t mem_mod_event_uuid =
+ UUID_INIT(0xfe927475, 0xdd59, 0x4339,
+ 0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74);
+
+static void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
+ enum cxl_event_log_type type,
+ struct cxl_event_record_raw *record)
+{
+ uuid_t *id = &record->hdr.id;
+
+ if (uuid_equal(id, &gen_media_event_uuid)) {
+ struct cxl_event_gen_media *rec =
+ (struct cxl_event_gen_media *)record;
+
+ trace_cxl_general_media(cxlmd, type, rec);
+ } else if (uuid_equal(id, &dram_event_uuid)) {
+ struct cxl_event_dram *rec = (struct cxl_event_dram *)record;
+
+ trace_cxl_dram(cxlmd, type, rec);
+ } else if (uuid_equal(id, &mem_mod_event_uuid)) {
+ struct cxl_event_mem_module *rec =
+ (struct cxl_event_mem_module *)record;
+
+ trace_cxl_memory_module(cxlmd, type, rec);
+ } else {
+ /* For unknown record types print just the header */
+ trace_cxl_generic_event(cxlmd, type, record);
+ }
+}
+
+static int cxl_clear_event_record(struct cxl_dev_state *cxlds,
+ enum cxl_event_log_type log,
+ struct cxl_get_event_payload *get_pl)
+{
+ struct cxl_mbox_clear_event_payload *payload;
+ u16 total = le16_to_cpu(get_pl->record_count);
+ u8 max_handles = CXL_CLEAR_EVENT_MAX_HANDLES;
+ size_t pl_size = struct_size(payload, handles, max_handles);
+ struct cxl_mbox_cmd mbox_cmd;
+ u16 cnt;
+ int rc = 0;
+ int i;
+
+ /* Payload size may limit the max handles */
+ if (pl_size > cxlds->payload_size) {
+ max_handles = (cxlds->payload_size - sizeof(*payload)) /
+ sizeof(__le16);
+ pl_size = struct_size(payload, handles, max_handles);
+ }
+
+ payload = kvzalloc(pl_size, GFP_KERNEL);
+ if (!payload)
+ return -ENOMEM;
+
+ *payload = (struct cxl_mbox_clear_event_payload) {
+ .event_log = log,
+ };
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_CLEAR_EVENT_RECORD,
+ .payload_in = payload,
+ .size_in = pl_size,
+ };
+
+ /*
+ * Clear Event Records uses u8 for the handle cnt while Get Event
+ * Record can return up to 0xffff records.
+ */
+ i = 0;
+ for (cnt = 0; cnt < total; cnt++) {
+ payload->handles[i++] = get_pl->records[cnt].hdr.handle;
+ dev_dbg(cxlds->dev, "Event log '%d': Clearing %u\n",
+ log, le16_to_cpu(payload->handles[i]));
+
+ if (i == max_handles) {
+ payload->nr_recs = i;
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc)
+ goto free_pl;
+ i = 0;
+ }
+ }
+
+ /* Clear what is left if any */
+ if (i) {
+ payload->nr_recs = i;
+ mbox_cmd.size_in = struct_size(payload, handles, i);
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc)
+ goto free_pl;
+ }
+
+free_pl:
+ kvfree(payload);
+ return rc;
+}
+
+static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
+ enum cxl_event_log_type type)
+{
+ struct cxl_get_event_payload *payload;
+ struct cxl_mbox_cmd mbox_cmd;
+ u8 log_type = type;
+ u16 nr_rec;
+
+ mutex_lock(&cxlds->event.log_lock);
+ payload = cxlds->event.buf;
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_EVENT_RECORD,
+ .payload_in = &log_type,
+ .size_in = sizeof(log_type),
+ .payload_out = payload,
+ .size_out = cxlds->payload_size,
+ .min_out = struct_size(payload, records, 0),
+ };
+
+ do {
+ int rc, i;
+
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc) {
+ dev_err_ratelimited(cxlds->dev,
+ "Event log '%d': Failed to query event records : %d",
+ type, rc);
+ break;
+ }
+
+ nr_rec = le16_to_cpu(payload->record_count);
+ if (!nr_rec)
+ break;
+
+ for (i = 0; i < nr_rec; i++)
+ cxl_event_trace_record(cxlds->cxlmd, type,
+ &payload->records[i]);
+
+ if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW)
+ trace_cxl_overflow(cxlds->cxlmd, type, payload);
+
+ rc = cxl_clear_event_record(cxlds, type, payload);
+ if (rc) {
+ dev_err_ratelimited(cxlds->dev,
+ "Event log '%d': Failed to clear events : %d",
+ type, rc);
+ break;
+ }
+ } while (nr_rec);
+
+ mutex_unlock(&cxlds->event.log_lock);
+}
+
+/**
+ * cxl_mem_get_event_records - Get Event Records from the device
+ * @cxlds: The device data for the operation
+ * @status: Event Status register value identifying which events are available.
+ *
+ * Retrieve all event records available on the device, report them as trace
+ * events, and clear them.
+ *
+ * See CXL rev 3.0 @8.2.9.2.2 Get Event Records
+ * See CXL rev 3.0 @8.2.9.2.3 Clear Event Records
+ */
+void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status)
+{
+ dev_dbg(cxlds->dev, "Reading event logs: %x\n", status);
+
+ if (status & CXLDEV_EVENT_STATUS_FATAL)
+ cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FATAL);
+ if (status & CXLDEV_EVENT_STATUS_FAIL)
+ cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL);
+ if (status & CXLDEV_EVENT_STATUS_WARN)
+ cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN);
+ if (status & CXLDEV_EVENT_STATUS_INFO)
+ cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL);
+
/**
* cxl_mem_get_partition_info - Get partition info
* @cxlds: The device data for the operation
@@ -857,6 +1081,32 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds)
}
EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL);
+int cxl_set_timestamp(struct cxl_dev_state *cxlds)
+{
+ struct cxl_mbox_cmd mbox_cmd;
+ struct cxl_mbox_set_timestamp_in pi;
+ int rc;
+
+ pi.timestamp = cpu_to_le64(ktime_get_real_ns());
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_SET_TIMESTAMP,
+ .size_in = sizeof(pi),
+ .payload_in = &pi,
+ };
+
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ /*
+ * Command is optional. Devices may have another way of providing
+ * a timestamp, or may return all 0s in timestamp fields.
+ * Don't report an error if this command isn't supported
+ */
+ if (rc && (mbox_cmd.return_code != CXL_MBOX_CMD_RC_UNSUPPORTED))
+ return rc;
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, CXL);
+
struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
{
struct cxl_dev_state *cxlds;
@@ -868,6 +1118,7 @@ struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
}
mutex_init(&cxlds->mbox_mutex);
+ mutex_init(&cxlds->event.log_lock);
cxlds->dev = dev;
return cxlds;
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index b75dd77df7ce..28a05f2fe32d 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -242,10 +242,11 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
if (!cxlmd)
return ERR_PTR(-ENOMEM);
- rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL);
+ rc = ida_alloc_max(&cxl_memdev_ida, CXL_MEM_MAX_DEVS - 1, GFP_KERNEL);
if (rc < 0)
goto err;
cxlmd->id = rc;
+ cxlmd->depth = -1;
dev = &cxlmd->dev;
device_initialize(dev);
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 57764e9cd19d..7328a2552411 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -9,6 +9,7 @@
#include <cxlmem.h>
#include <cxl.h>
#include "core.h"
+#include "trace.h"
/**
* DOC: cxl core pci
@@ -141,11 +142,10 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds)
}
EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, CXL);
-static int wait_for_valid(struct cxl_dev_state *cxlds)
+static int wait_for_valid(struct pci_dev *pdev, int d)
{
- struct pci_dev *pdev = to_pci_dev(cxlds->dev);
- int d = cxlds->cxl_dvsec, rc;
u32 val;
+ int rc;
/*
* Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high
@@ -213,11 +213,6 @@ static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds)
return devm_add_action_or_reset(host, clear_mem_enable, cxlds);
}
-static bool range_contains(struct range *r1, struct range *r2)
-{
- return r1->start <= r2->start && r1->end >= r2->end;
-}
-
/* require dvsec ranges to be covered by a locked platform window */
static int dvsec_range_allowed(struct device *dev, void *arg)
{
@@ -229,8 +224,6 @@ static int dvsec_range_allowed(struct device *dev, void *arg)
cxld = to_cxl_decoder(dev);
- if (!(cxld->flags & CXL_DECODER_F_LOCK))
- return 0;
if (!(cxld->flags & CXL_DECODER_F_RAM))
return 0;
@@ -260,94 +253,11 @@ static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm)
return devm_add_action_or_reset(host, disable_hdm, cxlhdm);
}
-static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
- struct cxl_hdm *cxlhdm,
- struct cxl_endpoint_dvsec_info *info)
+int cxl_dvsec_rr_decode(struct device *dev, int d,
+ struct cxl_endpoint_dvsec_info *info)
{
- void __iomem *hdm = cxlhdm->regs.hdm_decoder;
- struct cxl_port *port = cxlhdm->port;
- struct device *dev = cxlds->dev;
- struct cxl_port *root;
- int i, rc, allowed;
- u32 global_ctrl;
-
- global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
-
- /*
- * If the HDM Decoder Capability is already enabled then assume
- * that some other agent like platform firmware set it up.
- */
- if (global_ctrl & CXL_HDM_DECODER_ENABLE) {
- rc = devm_cxl_enable_mem(&port->dev, cxlds);
- if (rc)
- return false;
- return true;
- }
-
- root = to_cxl_port(port->dev.parent);
- while (!is_cxl_root(root) && is_cxl_port(root->dev.parent))
- root = to_cxl_port(root->dev.parent);
- if (!is_cxl_root(root)) {
- dev_err(dev, "Failed to acquire root port for HDM enable\n");
- return false;
- }
-
- for (i = 0, allowed = 0; info->mem_enabled && i < info->ranges; i++) {
- struct device *cxld_dev;
-
- cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i],
- dvsec_range_allowed);
- if (!cxld_dev) {
- dev_dbg(dev, "DVSEC Range%d denied by platform\n", i);
- continue;
- }
- dev_dbg(dev, "DVSEC Range%d allowed by platform\n", i);
- put_device(cxld_dev);
- allowed++;
- }
-
- if (!allowed) {
- cxl_set_mem_enable(cxlds, 0);
- info->mem_enabled = 0;
- }
-
- /*
- * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base
- * [High,Low] when HDM operation is enabled the range register values
- * are ignored by the device, but the spec also recommends matching the
- * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges
- * are expected even though Linux does not require or maintain that
- * match. If at least one DVSEC range is enabled and allowed, skip HDM
- * Decoder Capability Enable.
- */
- if (info->mem_enabled)
- return false;
-
- rc = devm_cxl_enable_hdm(&port->dev, cxlhdm);
- if (rc)
- return false;
-
- rc = devm_cxl_enable_mem(&port->dev, cxlds);
- if (rc)
- return false;
-
- return true;
-}
-
-/**
- * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint
- * @cxlds: Device state
- * @cxlhdm: Mapped HDM decoder Capability
- *
- * Try to enable the endpoint's HDM Decoder Capability
- */
-int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm)
-{
- struct pci_dev *pdev = to_pci_dev(cxlds->dev);
- struct cxl_endpoint_dvsec_info info = { 0 };
+ struct pci_dev *pdev = to_pci_dev(dev);
int hdm_count, rc, i, ranges = 0;
- struct device *dev = &pdev->dev;
- int d = cxlds->cxl_dvsec;
u16 cap, ctrl;
if (!d) {
@@ -378,7 +288,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm)
if (!hdm_count || hdm_count > 2)
return -EINVAL;
- rc = wait_for_valid(cxlds);
+ rc = wait_for_valid(pdev, d);
if (rc) {
dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc);
return rc;
@@ -389,9 +299,9 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm)
* disabled, and they will remain moot after the HDM Decoder
* capability is enabled.
*/
- info.mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl);
- if (!info.mem_enabled)
- goto hdm_init;
+ info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl);
+ if (!info->mem_enabled)
+ return 0;
for (i = 0; i < hdm_count; i++) {
u64 base, size;
@@ -410,6 +320,13 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm)
return rc;
size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK;
+ if (!size) {
+ info->dvsec_range[i] = (struct range) {
+ .start = 0,
+ .end = CXL_RESOURCE_NONE,
+ };
+ continue;
+ }
rc = pci_read_config_dword(
pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp);
@@ -425,29 +342,94 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm)
base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK;
- info.dvsec_range[i] = (struct range) {
+ info->dvsec_range[i] = (struct range) {
.start = base,
.end = base + size - 1
};
- if (size)
- ranges++;
+ ranges++;
}
- info.ranges = ranges;
+ info->ranges = ranges;
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_dvsec_rr_decode, CXL);
+
+/**
+ * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint
+ * @cxlds: Device state
+ * @cxlhdm: Mapped HDM decoder Capability
+ * @info: Cached DVSEC range registers info
+ *
+ * Try to enable the endpoint's HDM Decoder Capability
+ */
+int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
+ struct cxl_endpoint_dvsec_info *info)
+{
+ void __iomem *hdm = cxlhdm->regs.hdm_decoder;
+ struct cxl_port *port = cxlhdm->port;
+ struct device *dev = cxlds->dev;
+ struct cxl_port *root;
+ int i, rc, allowed;
+ u32 global_ctrl = 0;
+
+ if (hdm)
+ global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
/*
- * If DVSEC ranges are being used instead of HDM decoder registers there
- * is no use in trying to manage those.
+ * If the HDM Decoder Capability is already enabled then assume
+ * that some other agent like platform firmware set it up.
*/
-hdm_init:
- if (!__cxl_hdm_decode_init(cxlds, cxlhdm, &info)) {
- dev_err(dev,
- "Legacy range registers configuration prevents HDM operation.\n");
- return -EBUSY;
+ if (global_ctrl & CXL_HDM_DECODER_ENABLE || (!hdm && info->mem_enabled))
+ return devm_cxl_enable_mem(&port->dev, cxlds);
+ else if (!hdm)
+ return -ENODEV;
+
+ root = to_cxl_port(port->dev.parent);
+ while (!is_cxl_root(root) && is_cxl_port(root->dev.parent))
+ root = to_cxl_port(root->dev.parent);
+ if (!is_cxl_root(root)) {
+ dev_err(dev, "Failed to acquire root port for HDM enable\n");
+ return -ENODEV;
}
- return 0;
+ for (i = 0, allowed = 0; info->mem_enabled && i < info->ranges; i++) {
+ struct device *cxld_dev;
+
+ cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i],
+ dvsec_range_allowed);
+ if (!cxld_dev) {
+ dev_dbg(dev, "DVSEC Range%d denied by platform\n", i);
+ continue;
+ }
+ dev_dbg(dev, "DVSEC Range%d allowed by platform\n", i);
+ put_device(cxld_dev);
+ allowed++;
+ }
+
+ if (!allowed) {
+ cxl_set_mem_enable(cxlds, 0);
+ info->mem_enabled = 0;
+ }
+
+ /*
+ * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base
+ * [High,Low] when HDM operation is enabled the range register values
+ * are ignored by the device, but the spec also recommends matching the
+ * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges
+ * are expected even though Linux does not require or maintain that
+ * match. If at least one DVSEC range is enabled and allowed, skip HDM
+ * Decoder Capability Enable.
+ */
+ if (info->mem_enabled)
+ return 0;
+
+ rc = devm_cxl_enable_hdm(&port->dev, cxlhdm);
+ if (rc)
+ return rc;
+
+ return devm_cxl_enable_mem(&port->dev, cxlds);
}
EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL);
@@ -622,3 +604,113 @@ void read_cdat_data(struct cxl_port *port)
}
}
EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
+
+void cxl_cor_error_detected(struct pci_dev *pdev)
+{
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ void __iomem *addr;
+ u32 status;
+
+ if (!cxlds->regs.ras)
+ return;
+
+ addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
+ status = readl(addr);
+ if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
+ writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
+ trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
+ }
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL);
+
+/* CXL spec rev3.0 8.2.4.16.1 */
+static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
+{
+ void __iomem *addr;
+ u32 *log_addr;
+ int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
+
+ addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
+ log_addr = log;
+
+ for (i = 0; i < log_u32_size; i++) {
+ *log_addr = readl(addr);
+ log_addr++;
+ addr += sizeof(u32);
+ }
+}
+
+/*
+ * Log the state of the RAS status registers and prepare them to log the
+ * next error status. Return 1 if reset needed.
+ */
+static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
+{
+ u32 hl[CXL_HEADERLOG_SIZE_U32];
+ void __iomem *addr;
+ u32 status;
+ u32 fe;
+
+ if (!cxlds->regs.ras)
+ return false;
+
+ addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
+ status = readl(addr);
+ if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
+ return false;
+
+ /* If multiple errors, log header points to first error from ctrl reg */
+ if (hweight32(status) > 1) {
+ void __iomem *rcc_addr =
+ cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
+
+ fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
+ readl(rcc_addr)));
+ } else {
+ fe = status;
+ }
+
+ header_log_copy(cxlds, hl);
+ trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl);
+ writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
+
+ return true;
+}
+
+pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct device *dev = &cxlmd->dev;
+ bool ue;
+
+ /*
+ * A frozen channel indicates an impending reset which is fatal to
+ * CXL.mem operation, and will likely crash the system. On the off
+ * chance the situation is recoverable dump the status of the RAS
+ * capability registers and bounce the active state of the memdev.
+ */
+ ue = cxl_report_and_clear(cxlds);
+
+ switch (state) {
+ case pci_channel_io_normal:
+ if (ue) {
+ device_release_driver(dev);
+ return PCI_ERS_RESULT_NEED_RESET;
+ }
+ return PCI_ERS_RESULT_CAN_RECOVER;
+ case pci_channel_io_frozen:
+ dev_warn(&pdev->dev,
+ "%s: frozen state error detected, disable CXL.mem\n",
+ dev_name(dev));
+ device_release_driver(dev);
+ return PCI_ERS_RESULT_NEED_RESET;
+ case pci_channel_io_perm_failure:
+ dev_warn(&pdev->dev,
+ "failure state error detected, request disconnect\n");
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_error_detected, CXL);
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 3f02dc135a12..8ee6b6e2e2a4 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -46,6 +46,8 @@ static int cxl_device_id(const struct device *dev)
return CXL_DEVICE_NVDIMM;
if (dev->type == CXL_PMEM_REGION_TYPE())
return CXL_DEVICE_PMEM_REGION;
+ if (dev->type == CXL_DAX_REGION_TYPE())
+ return CXL_DEVICE_DAX_REGION;
if (is_cxl_port(dev)) {
if (is_cxl_root(to_cxl_port(dev)))
return CXL_DEVICE_ROOT;
@@ -180,17 +182,7 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
{
struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
- switch (cxled->mode) {
- case CXL_DECODER_RAM:
- return sysfs_emit(buf, "ram\n");
- case CXL_DECODER_PMEM:
- return sysfs_emit(buf, "pmem\n");
- case CXL_DECODER_NONE:
- return sysfs_emit(buf, "none\n");
- case CXL_DECODER_MIXED:
- default:
- return sysfs_emit(buf, "mixed\n");
- }
+ return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxled->mode));
}
static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
@@ -304,6 +296,7 @@ static struct attribute *cxl_decoder_root_attrs[] = {
&dev_attr_cap_type3.attr,
&dev_attr_target_list.attr,
SET_CXL_REGION_ATTR(create_pmem_region)
+ SET_CXL_REGION_ATTR(create_ram_region)
SET_CXL_REGION_ATTR(delete_region)
NULL,
};
@@ -315,6 +308,13 @@ static bool can_create_pmem(struct cxl_root_decoder *cxlrd)
return (cxlrd->cxlsd.cxld.flags & flags) == flags;
}
+static bool can_create_ram(struct cxl_root_decoder *cxlrd)
+{
+ unsigned long flags = CXL_DECODER_F_TYPE3 | CXL_DECODER_F_RAM;
+
+ return (cxlrd->cxlsd.cxld.flags & flags) == flags;
+}
+
static umode_t cxl_root_decoder_visible(struct kobject *kobj, struct attribute *a, int n)
{
struct device *dev = kobj_to_dev(kobj);
@@ -323,7 +323,11 @@ static umode_t cxl_root_decoder_visible(struct kobject *kobj, struct attribute *
if (a == CXL_REGION_ATTR(create_pmem_region) && !can_create_pmem(cxlrd))
return 0;
- if (a == CXL_REGION_ATTR(delete_region) && !can_create_pmem(cxlrd))
+ if (a == CXL_REGION_ATTR(create_ram_region) && !can_create_ram(cxlrd))
+ return 0;
+
+ if (a == CXL_REGION_ATTR(delete_region) &&
+ !(can_create_pmem(cxlrd) || can_create_ram(cxlrd)))
return 0;
return a->mode;
@@ -444,6 +448,7 @@ bool is_endpoint_decoder(struct device *dev)
{
return dev->type == &cxl_decoder_endpoint_type;
}
+EXPORT_SYMBOL_NS_GPL(is_endpoint_decoder, CXL);
bool is_root_decoder(struct device *dev)
{
@@ -455,6 +460,7 @@ bool is_switch_decoder(struct device *dev)
{
return is_root_decoder(dev) || dev->type == &cxl_decoder_switch_type;
}
+EXPORT_SYMBOL_NS_GPL(is_switch_decoder, CXL);
struct cxl_decoder *to_cxl_decoder(struct device *dev)
{
@@ -482,6 +488,7 @@ struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev)
return NULL;
return container_of(dev, struct cxl_switch_decoder, cxld.dev);
}
+EXPORT_SYMBOL_NS_GPL(to_cxl_switch_decoder, CXL);
static void cxl_ep_release(struct cxl_ep *ep)
{
@@ -583,6 +590,29 @@ static int devm_cxl_link_uport(struct device *host, struct cxl_port *port)
return devm_add_action_or_reset(host, cxl_unlink_uport, port);
}
+static void cxl_unlink_parent_dport(void *_port)
+{
+ struct cxl_port *port = _port;
+
+ sysfs_remove_link(&port->dev.kobj, "parent_dport");
+}
+
+static int devm_cxl_link_parent_dport(struct device *host,
+ struct cxl_port *port,
+ struct cxl_dport *parent_dport)
+{
+ int rc;
+
+ if (!parent_dport)
+ return 0;
+
+ rc = sysfs_create_link(&port->dev.kobj, &parent_dport->dport->kobj,
+ "parent_dport");
+ if (rc)
+ return rc;
+ return devm_add_action_or_reset(host, cxl_unlink_parent_dport, port);
+}
+
static struct lock_class_key cxl_port_key;
static struct cxl_port *cxl_port_alloc(struct device *uport,
@@ -692,6 +722,10 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host,
if (rc)
return ERR_PTR(rc);
+ rc = devm_cxl_link_parent_dport(host, port, parent_dport);
+ if (rc)
+ return ERR_PTR(rc);
+
return port;
err:
@@ -1137,7 +1171,7 @@ static struct cxl_port *find_cxl_port_at(struct cxl_port *parent_port,
}
/*
- * All users of grandparent() are using it to walk PCIe-like swich port
+ * All users of grandparent() are using it to walk PCIe-like switch port
* hierarchy. A PCIe switch is comprised of a bridge device representing the
* upstream switch port and N bridges representing downstream switch ports. When
* bridges stack the grand-parent of a downstream switch port is another
@@ -1164,6 +1198,7 @@ static void delete_endpoint(void *data)
device_lock(parent);
if (parent->driver && !endpoint->dead) {
+ devm_release_action(parent, cxl_unlink_parent_dport, endpoint);
devm_release_action(parent, cxl_unlink_uport, endpoint);
devm_release_action(parent, unregister_port, endpoint);
}
@@ -1179,6 +1214,7 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
get_device(&endpoint->dev);
dev_set_drvdata(dev, endpoint);
+ cxlmd->depth = endpoint->depth;
return devm_add_action_or_reset(dev, delete_endpoint, cxlmd);
}
EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, CXL);
@@ -1194,6 +1230,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, CXL);
*/
static void delete_switch_port(struct cxl_port *port)
{
+ devm_release_action(port->dev.parent, cxl_unlink_parent_dport, port);
devm_release_action(port->dev.parent, cxl_unlink_uport, port);
devm_release_action(port->dev.parent, unregister_port, port);
}
@@ -1212,50 +1249,55 @@ static void reap_dports(struct cxl_port *port)
}
}
+struct detach_ctx {
+ struct cxl_memdev *cxlmd;
+ int depth;
+};
+
+static int port_has_memdev(struct device *dev, const void *data)
+{
+ const struct detach_ctx *ctx = data;
+ struct cxl_port *port;
+
+ if (!is_cxl_port(dev))
+ return 0;
+
+ port = to_cxl_port(dev);
+ if (port->depth != ctx->depth)
+ return 0;
+
+ return !!cxl_ep_load(port, ctx->cxlmd);
+}
+
static void cxl_detach_ep(void *data)
{
struct cxl_memdev *cxlmd = data;
- struct device *iter;
- for (iter = &cxlmd->dev; iter; iter = grandparent(iter)) {
- struct device *dport_dev = grandparent(iter);
+ for (int i = cxlmd->depth - 1; i >= 1; i--) {
struct cxl_port *port, *parent_port;
+ struct detach_ctx ctx = {
+ .cxlmd = cxlmd,
+ .depth = i,
+ };
+ struct device *dev;
struct cxl_ep *ep;
bool died = false;
- if (!dport_dev)
- break;
-
- port = find_cxl_port(dport_dev, NULL);
- if (!port)
+ dev = bus_find_device(&cxl_bus_type, NULL, &ctx,
+ port_has_memdev);
+ if (!dev)
continue;
-
- if (is_cxl_root(port)) {
- put_device(&port->dev);
- continue;
- }
+ port = to_cxl_port(dev);
parent_port = to_cxl_port(port->dev.parent);
device_lock(&parent_port->dev);
- if (!parent_port->dev.driver) {
- /*
- * The bottom-up race to delete the port lost to a
- * top-down port disable, give up here, because the
- * parent_port ->remove() will have cleaned up all
- * descendants.
- */
- device_unlock(&parent_port->dev);
- put_device(&port->dev);
- continue;
- }
-
device_lock(&port->dev);
ep = cxl_ep_load(port, cxlmd);
dev_dbg(&cxlmd->dev, "disconnect %s from %s\n",
ep ? dev_name(ep->ep) : "", dev_name(&port->dev));
cxl_ep_remove(port, ep);
if (ep && !port->dead && xa_empty(&port->endpoints) &&
- !is_cxl_root(parent_port)) {
+ !is_cxl_root(parent_port) && parent_port->dev.driver) {
/*
* This was the last ep attached to a dynamically
* enumerated port. Block new cxl_add_ep() and garbage
@@ -1591,6 +1633,7 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
}
cxlrd->calc_hb = calc_hb;
+ mutex_init(&cxlrd->range_lock);
cxld = &cxlsd->cxld;
cxld->dev.type = &cxl_decoder_root_type;
@@ -1974,6 +2017,6 @@ static void cxl_core_exit(void)
debugfs_remove_recursive(cxl_debugfs);
}
-module_init(cxl_core_init);
+subsys_initcall(cxl_core_init);
module_exit(cxl_core_exit);
MODULE_LICENSE("GPL v2");
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 940f805b1534..f29028148806 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -6,6 +6,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/uuid.h>
+#include <linux/sort.h>
#include <linux/idr.h>
#include <cxlmem.h>
#include <cxl.h>
@@ -45,7 +46,10 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
rc = down_read_interruptible(&cxl_region_rwsem);
if (rc)
return rc;
- rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
+ if (cxlr->mode != CXL_DECODER_PMEM)
+ rc = sysfs_emit(buf, "\n");
+ else
+ rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
up_read(&cxl_region_rwsem);
return rc;
@@ -157,6 +161,22 @@ static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
return 0;
}
+static int commit_decoder(struct cxl_decoder *cxld)
+{
+ struct cxl_switch_decoder *cxlsd = NULL;
+
+ if (cxld->commit)
+ return cxld->commit(cxld);
+
+ if (is_switch_decoder(&cxld->dev))
+ cxlsd = to_cxl_switch_decoder(&cxld->dev);
+
+ if (dev_WARN_ONCE(&cxld->dev, !cxlsd || cxlsd->nr_targets > 1,
+ "->commit() is required\n"))
+ return -ENXIO;
+ return 0;
+}
+
static int cxl_region_decode_commit(struct cxl_region *cxlr)
{
struct cxl_region_params *p = &cxlr->params;
@@ -175,8 +195,7 @@ static int cxl_region_decode_commit(struct cxl_region *cxlr)
iter = to_cxl_port(iter->dev.parent)) {
cxl_rr = cxl_rr_load(iter, cxlr);
cxld = cxl_rr->decoder;
- if (cxld->commit)
- rc = cxld->commit(cxld);
+ rc = commit_decoder(cxld);
if (rc)
break;
}
@@ -287,8 +306,12 @@ static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
struct device *dev = kobj_to_dev(kobj);
struct cxl_region *cxlr = to_cxl_region(dev);
+ /*
+ * Support tooling that expects to find a 'uuid' attribute for all
+ * regions regardless of mode.
+ */
if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM)
- return 0;
+ return 0444;
return a->mode;
}
@@ -401,7 +424,7 @@ static ssize_t interleave_granularity_store(struct device *dev,
* When the host-bridge is interleaved, disallow region granularity !=
* root granularity. Regions with a granularity less than the root
* interleave result in needing multiple endpoints to support a single
- * slot in the interleave (possible to suport in the future). Regions
+ * slot in the interleave (possible to support in the future). Regions
* with a granularity greater than the root interleave result in invalid
* DPA translations (invalid to support).
*/
@@ -445,6 +468,15 @@ static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR_RO(resource);
+static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+
+ return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode));
+}
+static DEVICE_ATTR_RO(mode);
+
static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
{
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
@@ -495,7 +527,12 @@ static void cxl_region_iomem_release(struct cxl_region *cxlr)
if (device_is_registered(&cxlr->dev))
lockdep_assert_held_write(&cxl_region_rwsem);
if (p->res) {
- remove_resource(p->res);
+ /*
+ * Autodiscovered regions may not have been able to insert their
+ * resource.
+ */
+ if (p->res->parent)
+ remove_resource(p->res);
kfree(p->res);
p->res = NULL;
}
@@ -572,6 +609,7 @@ static struct attribute *cxl_region_attrs[] = {
&dev_attr_interleave_granularity.attr,
&dev_attr_resource.attr,
&dev_attr_size.attr,
+ &dev_attr_mode.attr,
NULL,
};
@@ -1075,12 +1113,35 @@ static int cxl_port_setup_targets(struct cxl_port *port,
return rc;
}
- cxld->interleave_ways = iw;
- cxld->interleave_granularity = ig;
- cxld->hpa_range = (struct range) {
- .start = p->res->start,
- .end = p->res->end,
- };
+ if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
+ if (cxld->interleave_ways != iw ||
+ cxld->interleave_granularity != ig ||
+ cxld->hpa_range.start != p->res->start ||
+ cxld->hpa_range.end != p->res->end ||
+ ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
+ dev_err(&cxlr->dev,
+ "%s:%s %s expected iw: %d ig: %d %pr\n",
+ dev_name(port->uport), dev_name(&port->dev),
+ __func__, iw, ig, p->res);
+ dev_err(&cxlr->dev,
+ "%s:%s %s got iw: %d ig: %d state: %s %#llx:%#llx\n",
+ dev_name(port->uport), dev_name(&port->dev),
+ __func__, cxld->interleave_ways,
+ cxld->interleave_granularity,
+ (cxld->flags & CXL_DECODER_F_ENABLE) ?
+ "enabled" :
+ "disabled",
+ cxld->hpa_range.start, cxld->hpa_range.end);
+ return -ENXIO;
+ }
+ } else {
+ cxld->interleave_ways = iw;
+ cxld->interleave_granularity = ig;
+ cxld->hpa_range = (struct range) {
+ .start = p->res->start,
+ .end = p->res->end,
+ };
+ }
dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport),
dev_name(&port->dev), iw, ig);
add_target:
@@ -1091,7 +1152,17 @@ add_target:
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
return -ENXIO;
}
- cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
+ if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
+ if (cxlsd->target[cxl_rr->nr_targets_set] != ep->dport) {
+ dev_dbg(&cxlr->dev, "%s:%s: %s expected %s at %d\n",
+ dev_name(port->uport), dev_name(&port->dev),
+ dev_name(&cxlsd->cxld.dev),
+ dev_name(ep->dport->dport),
+ cxl_rr->nr_targets_set);
+ return -ENXIO;
+ }
+ } else
+ cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
inc = 1;
out_target_set:
cxl_rr->nr_targets_set += inc;
@@ -1133,6 +1204,13 @@ static void cxl_region_teardown_targets(struct cxl_region *cxlr)
struct cxl_ep *ep;
int i;
+ /*
+ * In the auto-discovery case skip automatic teardown since the
+ * address space is already active
+ */
+ if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
+ return;
+
for (i = 0; i < p->nr_targets; i++) {
cxled = p->targets[i];
cxlmd = cxled_to_memdev(cxled);
@@ -1165,8 +1243,8 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr)
iter = to_cxl_port(iter->dev.parent);
/*
- * Descend the topology tree programming targets while
- * looking for conflicts.
+ * Descend the topology tree programming / validating
+ * targets while looking for conflicts.
*/
for (ep = cxl_ep_load(iter, cxlmd); iter;
iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
@@ -1181,29 +1259,13 @@ static int cxl_region_setup_targets(struct cxl_region *cxlr)
return 0;
}
-static int cxl_region_attach(struct cxl_region *cxlr,
- struct cxl_endpoint_decoder *cxled, int pos)
+static int cxl_region_validate_position(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled,
+ int pos)
{
- struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
- struct cxl_port *ep_port, *root_port, *iter;
struct cxl_region_params *p = &cxlr->params;
- struct cxl_dport *dport;
- int i, rc = -ENXIO;
-
- if (cxled->mode == CXL_DECODER_DEAD) {
- dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
- return -ENODEV;
- }
-
- /* all full of members, or interleave config not established? */
- if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
- dev_dbg(&cxlr->dev, "region already active\n");
- return -EBUSY;
- } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
- dev_dbg(&cxlr->dev, "interleave config missing\n");
- return -ENXIO;
- }
+ int i;
if (pos < 0 || pos >= p->interleave_ways) {
dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
@@ -1242,6 +1304,256 @@ static int cxl_region_attach(struct cxl_region *cxlr,
}
}
+ return 0;
+}
+
+static int cxl_region_attach_position(struct cxl_region *cxlr,
+ struct cxl_root_decoder *cxlrd,
+ struct cxl_endpoint_decoder *cxled,
+ const struct cxl_dport *dport, int pos)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_port *iter;
+ int rc;
+
+ if (cxlrd->calc_hb(cxlrd, pos) != dport) {
+ dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ dev_name(&cxlrd->cxlsd.cxld.dev));
+ return -ENXIO;
+ }
+
+ for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
+ iter = to_cxl_port(iter->dev.parent)) {
+ rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
+ if (rc)
+ goto err;
+ }
+
+ return 0;
+
+err:
+ for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
+ iter = to_cxl_port(iter->dev.parent))
+ cxl_port_detach_region(iter, cxlr, cxled);
+ return rc;
+}
+
+static int cxl_region_attach_auto(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos)
+{
+ struct cxl_region_params *p = &cxlr->params;
+
+ if (cxled->state != CXL_DECODER_STATE_AUTO) {
+ dev_err(&cxlr->dev,
+ "%s: unable to add decoder to autodetected region\n",
+ dev_name(&cxled->cxld.dev));
+ return -EINVAL;
+ }
+
+ if (pos >= 0) {
+ dev_dbg(&cxlr->dev, "%s: expected auto position, not %d\n",
+ dev_name(&cxled->cxld.dev), pos);
+ return -EINVAL;
+ }
+
+ if (p->nr_targets >= p->interleave_ways) {
+ dev_err(&cxlr->dev, "%s: no more target slots available\n",
+ dev_name(&cxled->cxld.dev));
+ return -ENXIO;
+ }
+
+ /*
+ * Temporarily record the endpoint decoder into the target array. Yes,
+ * this means that userspace can view devices in the wrong position
+ * before the region activates, and must be careful to understand when
+ * it might be racing region autodiscovery.
+ */
+ pos = p->nr_targets;
+ p->targets[pos] = cxled;
+ cxled->pos = pos;
+ p->nr_targets++;
+
+ return 0;
+}
+
+static struct cxl_port *next_port(struct cxl_port *port)
+{
+ if (!port->parent_dport)
+ return NULL;
+ return port->parent_dport->port;
+}
+
+static int decoder_match_range(struct device *dev, void *data)
+{
+ struct cxl_endpoint_decoder *cxled = data;
+ struct cxl_switch_decoder *cxlsd;
+
+ if (!is_switch_decoder(dev))
+ return 0;
+
+ cxlsd = to_cxl_switch_decoder(dev);
+ return range_contains(&cxlsd->cxld.hpa_range, &cxled->cxld.hpa_range);
+}
+
+static void find_positions(const struct cxl_switch_decoder *cxlsd,
+ const struct cxl_port *iter_a,
+ const struct cxl_port *iter_b, int *a_pos,
+ int *b_pos)
+{
+ int i;
+
+ for (i = 0, *a_pos = -1, *b_pos = -1; i < cxlsd->nr_targets; i++) {
+ if (cxlsd->target[i] == iter_a->parent_dport)
+ *a_pos = i;
+ else if (cxlsd->target[i] == iter_b->parent_dport)
+ *b_pos = i;
+ if (*a_pos >= 0 && *b_pos >= 0)
+ break;
+ }
+}
+
+static int cmp_decode_pos(const void *a, const void *b)
+{
+ struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a;
+ struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b;
+ struct cxl_memdev *cxlmd_a = cxled_to_memdev(cxled_a);
+ struct cxl_memdev *cxlmd_b = cxled_to_memdev(cxled_b);
+ struct cxl_port *port_a = cxled_to_port(cxled_a);
+ struct cxl_port *port_b = cxled_to_port(cxled_b);
+ struct cxl_port *iter_a, *iter_b, *port = NULL;
+ struct cxl_switch_decoder *cxlsd;
+ struct device *dev;
+ int a_pos, b_pos;
+ unsigned int seq;
+
+ /* Exit early if any prior sorting failed */
+ if (cxled_a->pos < 0 || cxled_b->pos < 0)
+ return 0;
+
+ /*
+ * Walk up the hierarchy to find a shared port, find the decoder that
+ * maps the range, compare the relative position of those dport
+ * mappings.
+ */
+ for (iter_a = port_a; iter_a; iter_a = next_port(iter_a)) {
+ struct cxl_port *next_a, *next_b;
+
+ next_a = next_port(iter_a);
+ if (!next_a)
+ break;
+
+ for (iter_b = port_b; iter_b; iter_b = next_port(iter_b)) {
+ next_b = next_port(iter_b);
+ if (next_a != next_b)
+ continue;
+ port = next_a;
+ break;
+ }
+
+ if (port)
+ break;
+ }
+
+ if (!port) {
+ dev_err(cxlmd_a->dev.parent,
+ "failed to find shared port with %s\n",
+ dev_name(cxlmd_b->dev.parent));
+ goto err;
+ }
+
+ dev = device_find_child(&port->dev, cxled_a, decoder_match_range);
+ if (!dev) {
+ struct range *range = &cxled_a->cxld.hpa_range;
+
+ dev_err(port->uport,
+ "failed to find decoder that maps %#llx-%#llx\n",
+ range->start, range->end);
+ goto err;
+ }
+
+ cxlsd = to_cxl_switch_decoder(dev);
+ do {
+ seq = read_seqbegin(&cxlsd->target_lock);
+ find_positions(cxlsd, iter_a, iter_b, &a_pos, &b_pos);
+ } while (read_seqretry(&cxlsd->target_lock, seq));
+
+ put_device(dev);
+
+ if (a_pos < 0 || b_pos < 0) {
+ dev_err(port->uport,
+ "failed to find shared decoder for %s and %s\n",
+ dev_name(cxlmd_a->dev.parent),
+ dev_name(cxlmd_b->dev.parent));
+ goto err;
+ }
+
+ dev_dbg(port->uport, "%s comes %s %s\n", dev_name(cxlmd_a->dev.parent),
+ a_pos - b_pos < 0 ? "before" : "after",
+ dev_name(cxlmd_b->dev.parent));
+
+ return a_pos - b_pos;
+err:
+ cxled_a->pos = -1;
+ return 0;
+}
+
+static int cxl_region_sort_targets(struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ int i, rc = 0;
+
+ sort(p->targets, p->nr_targets, sizeof(p->targets[0]), cmp_decode_pos,
+ NULL);
+
+ for (i = 0; i < p->nr_targets; i++) {
+ struct cxl_endpoint_decoder *cxled = p->targets[i];
+
+ /*
+ * Record that sorting failed, but still continue to restore
+ * cxled->pos with its ->targets[] position so that follow-on
+ * code paths can reliably do p->targets[cxled->pos] to
+ * self-reference their entry.
+ */
+ if (cxled->pos < 0)
+ rc = -ENXIO;
+ cxled->pos = i;
+ }
+
+ dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful");
+ return rc;
+}
+
+static int cxl_region_attach(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_port *ep_port, *root_port;
+ struct cxl_dport *dport;
+ int rc = -ENXIO;
+
+ if (cxled->mode != cxlr->mode) {
+ dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
+ dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode);
+ return -EINVAL;
+ }
+
+ if (cxled->mode == CXL_DECODER_DEAD) {
+ dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
+ return -ENODEV;
+ }
+
+ /* all full of members, or interleave config not established? */
+ if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
+ dev_dbg(&cxlr->dev, "region already active\n");
+ return -EBUSY;
+ } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
+ dev_dbg(&cxlr->dev, "interleave config missing\n");
+ return -ENXIO;
+ }
+
ep_port = cxled_to_port(cxled);
root_port = cxlrd_to_port(cxlrd);
dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge);
@@ -1252,13 +1564,6 @@ static int cxl_region_attach(struct cxl_region *cxlr,
return -ENXIO;
}
- if (cxlrd->calc_hb(cxlrd, pos) != dport) {
- dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
- dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
- dev_name(&cxlrd->cxlsd.cxld.dev));
- return -ENXIO;
- }
-
if (cxled->cxld.target_type != cxlr->type) {
dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n",
dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
@@ -1282,13 +1587,58 @@ static int cxl_region_attach(struct cxl_region *cxlr,
return -EINVAL;
}
- for (iter = ep_port; !is_cxl_root(iter);
- iter = to_cxl_port(iter->dev.parent)) {
- rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
+ if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
+ int i;
+
+ rc = cxl_region_attach_auto(cxlr, cxled, pos);
if (rc)
- goto err;
+ return rc;
+
+ /* await more targets to arrive... */
+ if (p->nr_targets < p->interleave_ways)
+ return 0;
+
+ /*
+ * All targets are here, which implies all PCI enumeration that
+ * affects this region has been completed. Walk the topology to
+ * sort the devices into their relative region decode position.
+ */
+ rc = cxl_region_sort_targets(cxlr);
+ if (rc)
+ return rc;
+
+ for (i = 0; i < p->nr_targets; i++) {
+ cxled = p->targets[i];
+ ep_port = cxled_to_port(cxled);
+ dport = cxl_find_dport_by_dev(root_port,
+ ep_port->host_bridge);
+ rc = cxl_region_attach_position(cxlr, cxlrd, cxled,
+ dport, i);
+ if (rc)
+ return rc;
+ }
+
+ rc = cxl_region_setup_targets(cxlr);
+ if (rc)
+ return rc;
+
+ /*
+ * If target setup succeeds in the autodiscovery case
+ * then the region is already committed.
+ */
+ p->state = CXL_CONFIG_COMMIT;
+
+ return 0;
}
+ rc = cxl_region_validate_position(cxlr, cxled, pos);
+ if (rc)
+ return rc;
+
+ rc = cxl_region_attach_position(cxlr, cxlrd, cxled, dport, pos);
+ if (rc)
+ return rc;
+
p->targets[pos] = cxled;
cxled->pos = pos;
p->nr_targets++;
@@ -1311,10 +1661,8 @@ static int cxl_region_attach(struct cxl_region *cxlr,
err_decrement:
p->nr_targets--;
-err:
- for (iter = ep_port; !is_cxl_root(iter);
- iter = to_cxl_port(iter->dev.parent))
- cxl_port_detach_region(iter, cxlr, cxled);
+ cxled->pos = -1;
+ p->targets[pos] = NULL;
return rc;
}
@@ -1386,31 +1734,25 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
up_write(&cxl_region_rwsem);
}
-static int attach_target(struct cxl_region *cxlr, const char *decoder, int pos)
+static int attach_target(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos,
+ unsigned int state)
{
- struct device *dev;
- int rc;
-
- dev = bus_find_device_by_name(&cxl_bus_type, NULL, decoder);
- if (!dev)
- return -ENODEV;
-
- if (!is_endpoint_decoder(dev)) {
- put_device(dev);
- return -EINVAL;
- }
+ int rc = 0;
- rc = down_write_killable(&cxl_region_rwsem);
+ if (state == TASK_INTERRUPTIBLE)
+ rc = down_write_killable(&cxl_region_rwsem);
+ else
+ down_write(&cxl_region_rwsem);
if (rc)
- goto out;
+ return rc;
+
down_read(&cxl_dpa_rwsem);
- rc = cxl_region_attach(cxlr, to_cxl_endpoint_decoder(dev), pos);
+ rc = cxl_region_attach(cxlr, cxled, pos);
if (rc == 0)
set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
up_read(&cxl_dpa_rwsem);
up_write(&cxl_region_rwsem);
-out:
- put_device(dev);
return rc;
}
@@ -1448,8 +1790,23 @@ static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
if (sysfs_streq(buf, "\n"))
rc = detach_target(cxlr, pos);
- else
- rc = attach_target(cxlr, buf, pos);
+ else {
+ struct device *dev;
+
+ dev = bus_find_device_by_name(&cxl_bus_type, NULL, buf);
+ if (!dev)
+ return -ENODEV;
+
+ if (!is_endpoint_decoder(dev)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ rc = attach_target(cxlr, to_cxl_endpoint_decoder(dev), pos,
+ TASK_INTERRUPTIBLE);
+out:
+ put_device(dev);
+ }
if (rc < 0)
return rc;
@@ -1653,6 +2010,15 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
struct device *dev;
int rc;
+ switch (mode) {
+ case CXL_DECODER_RAM:
+ case CXL_DECODER_PMEM:
+ break;
+ default:
+ dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
+ return ERR_PTR(-EINVAL);
+ }
+
cxlr = cxl_region_alloc(cxlrd, id);
if (IS_ERR(cxlr))
return cxlr;
@@ -1681,12 +2047,38 @@ err:
return ERR_PTR(rc);
}
+static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf)
+{
+ return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
+}
+
static ssize_t create_pmem_region_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
+ return __create_region_show(to_cxl_root_decoder(dev), buf);
+}
- return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
+static ssize_t create_ram_region_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return __create_region_show(to_cxl_root_decoder(dev), buf);
+}
+
+static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
+ enum cxl_decoder_mode mode, int id)
+{
+ int rc;
+
+ rc = memregion_alloc(GFP_KERNEL);
+ if (rc < 0)
+ return ERR_PTR(rc);
+
+ if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
+ memregion_free(rc);
+ return ERR_PTR(-EBUSY);
+ }
+
+ return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_EXPANDER);
}
static ssize_t create_pmem_region_store(struct device *dev,
@@ -1695,29 +2087,39 @@ static ssize_t create_pmem_region_store(struct device *dev,
{
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
struct cxl_region *cxlr;
- int id, rc;
+ int rc, id;
rc = sscanf(buf, "region%d\n", &id);
if (rc != 1)
return -EINVAL;
- rc = memregion_alloc(GFP_KERNEL);
- if (rc < 0)
- return rc;
+ cxlr = __create_region(cxlrd, CXL_DECODER_PMEM, id);
+ if (IS_ERR(cxlr))
+ return PTR_ERR(cxlr);
- if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
- memregion_free(rc);
- return -EBUSY;
- }
+ return len;
+}
+DEVICE_ATTR_RW(create_pmem_region);
+
+static ssize_t create_ram_region_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
+ struct cxl_region *cxlr;
+ int rc, id;
+
+ rc = sscanf(buf, "region%d\n", &id);
+ if (rc != 1)
+ return -EINVAL;
- cxlr = devm_cxl_add_region(cxlrd, id, CXL_DECODER_PMEM,
- CXL_DECODER_EXPANDER);
+ cxlr = __create_region(cxlrd, CXL_DECODER_RAM, id);
if (IS_ERR(cxlr))
return PTR_ERR(cxlr);
return len;
}
-DEVICE_ATTR_RW(create_pmem_region);
+DEVICE_ATTR_RW(create_ram_region);
static ssize_t region_show(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -1878,6 +2280,75 @@ out:
return cxlr_pmem;
}
+static void cxl_dax_region_release(struct device *dev)
+{
+ struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev);
+
+ kfree(cxlr_dax);
+}
+
+static const struct attribute_group *cxl_dax_region_attribute_groups[] = {
+ &cxl_base_attribute_group,
+ NULL,
+};
+
+const struct device_type cxl_dax_region_type = {
+ .name = "cxl_dax_region",
+ .release = cxl_dax_region_release,
+ .groups = cxl_dax_region_attribute_groups,
+};
+
+static bool is_cxl_dax_region(struct device *dev)
+{
+ return dev->type == &cxl_dax_region_type;
+}
+
+struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
+{
+ if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev),
+ "not a cxl_dax_region device\n"))
+ return NULL;
+ return container_of(dev, struct cxl_dax_region, dev);
+}
+EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, CXL);
+
+static struct lock_class_key cxl_dax_region_key;
+
+static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_dax_region *cxlr_dax;
+ struct device *dev;
+
+ down_read(&cxl_region_rwsem);
+ if (p->state != CXL_CONFIG_COMMIT) {
+ cxlr_dax = ERR_PTR(-ENXIO);
+ goto out;
+ }
+
+ cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL);
+ if (!cxlr_dax) {
+ cxlr_dax = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ cxlr_dax->hpa_range.start = p->res->start;
+ cxlr_dax->hpa_range.end = p->res->end;
+
+ dev = &cxlr_dax->dev;
+ cxlr_dax->cxlr = cxlr;
+ device_initialize(dev);
+ lockdep_set_class(&dev->mutex, &cxl_dax_region_key);
+ device_set_pm_not_required(dev);
+ dev->parent = &cxlr->dev;
+ dev->bus = &cxl_bus_type;
+ dev->type = &cxl_dax_region_type;
+out:
+ up_read(&cxl_region_rwsem);
+
+ return cxlr_dax;
+}
+
static void cxlr_pmem_unregister(void *_cxlr_pmem)
{
struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem;
@@ -1962,6 +2433,227 @@ err_bridge:
return rc;
}
+static void cxlr_dax_unregister(void *_cxlr_dax)
+{
+ struct cxl_dax_region *cxlr_dax = _cxlr_dax;
+
+ device_unregister(&cxlr_dax->dev);
+}
+
+static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
+{
+ struct cxl_dax_region *cxlr_dax;
+ struct device *dev;
+ int rc;
+
+ cxlr_dax = cxl_dax_region_alloc(cxlr);
+ if (IS_ERR(cxlr_dax))
+ return PTR_ERR(cxlr_dax);
+
+ dev = &cxlr_dax->dev;
+ rc = dev_set_name(dev, "dax_region%d", cxlr->id);
+ if (rc)
+ goto err;
+
+ rc = device_add(dev);
+ if (rc)
+ goto err;
+
+ dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
+ dev_name(dev));
+
+ return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
+ cxlr_dax);
+err:
+ put_device(dev);
+ return rc;
+}
+
+static int match_decoder_by_range(struct device *dev, void *data)
+{
+ struct range *r1, *r2 = data;
+ struct cxl_root_decoder *cxlrd;
+
+ if (!is_root_decoder(dev))
+ return 0;
+
+ cxlrd = to_cxl_root_decoder(dev);
+ r1 = &cxlrd->cxlsd.cxld.hpa_range;
+ return range_contains(r1, r2);
+}
+
+static int match_region_by_range(struct device *dev, void *data)
+{
+ struct cxl_region_params *p;
+ struct cxl_region *cxlr;
+ struct range *r = data;
+ int rc = 0;
+
+ if (!is_cxl_region(dev))
+ return 0;
+
+ cxlr = to_cxl_region(dev);
+ p = &cxlr->params;
+
+ down_read(&cxl_region_rwsem);
+ if (p->res && p->res->start == r->start && p->res->end == r->end)
+ rc = 1;
+ up_read(&cxl_region_rwsem);
+
+ return rc;
+}
+
+/* Establish an empty region covering the given HPA range */
+static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
+ struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_port *port = cxlrd_to_port(cxlrd);
+ struct range *hpa = &cxled->cxld.hpa_range;
+ struct cxl_region_params *p;
+ struct cxl_region *cxlr;
+ struct resource *res;
+ int rc;
+
+ do {
+ cxlr = __create_region(cxlrd, cxled->mode,
+ atomic_read(&cxlrd->region_id));
+ } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
+
+ if (IS_ERR(cxlr)) {
+ dev_err(cxlmd->dev.parent,
+ "%s:%s: %s failed assign region: %ld\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ __func__, PTR_ERR(cxlr));
+ return cxlr;
+ }
+
+ down_write(&cxl_region_rwsem);
+ p = &cxlr->params;
+ if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
+ dev_err(cxlmd->dev.parent,
+ "%s:%s: %s autodiscovery interrupted\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ __func__);
+ rc = -EBUSY;
+ goto err;
+ }
+
+ set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
+
+ res = kmalloc(sizeof(*res), GFP_KERNEL);
+ if (!res) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
+ dev_name(&cxlr->dev));
+ rc = insert_resource(cxlrd->res, res);
+ if (rc) {
+ /*
+ * Platform-firmware may not have split resources like "System
+ * RAM" on CXL window boundaries see cxl_region_iomem_release()
+ */
+ dev_warn(cxlmd->dev.parent,
+ "%s:%s: %s %s cannot insert resource\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ __func__, dev_name(&cxlr->dev));
+ }
+
+ p->res = res;
+ p->interleave_ways = cxled->cxld.interleave_ways;
+ p->interleave_granularity = cxled->cxld.interleave_granularity;
+ p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
+
+ rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
+ if (rc)
+ goto err;
+
+ dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
+ dev_name(&cxlr->dev), p->res, p->interleave_ways,
+ p->interleave_granularity);
+
+ /* ...to match put_device() in cxl_add_to_region() */
+ get_device(&cxlr->dev);
+ up_write(&cxl_region_rwsem);
+
+ return cxlr;
+
+err:
+ up_write(&cxl_region_rwsem);
+ devm_release_action(port->uport, unregister_region, cxlr);
+ return ERR_PTR(rc);
+}
+
+int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct range *hpa = &cxled->cxld.hpa_range;
+ struct cxl_decoder *cxld = &cxled->cxld;
+ struct device *cxlrd_dev, *region_dev;
+ struct cxl_root_decoder *cxlrd;
+ struct cxl_region_params *p;
+ struct cxl_region *cxlr;
+ bool attach = false;
+ int rc;
+
+ cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range,
+ match_decoder_by_range);
+ if (!cxlrd_dev) {
+ dev_err(cxlmd->dev.parent,
+ "%s:%s no CXL window for range %#llx:%#llx\n",
+ dev_name(&cxlmd->dev), dev_name(&cxld->dev),
+ cxld->hpa_range.start, cxld->hpa_range.end);
+ return -ENXIO;
+ }
+
+ cxlrd = to_cxl_root_decoder(cxlrd_dev);
+
+ /*
+ * Ensure that if multiple threads race to construct_region() for @hpa
+ * one does the construction and the others add to that.
+ */
+ mutex_lock(&cxlrd->range_lock);
+ region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa,
+ match_region_by_range);
+ if (!region_dev) {
+ cxlr = construct_region(cxlrd, cxled);
+ region_dev = &cxlr->dev;
+ } else
+ cxlr = to_cxl_region(region_dev);
+ mutex_unlock(&cxlrd->range_lock);
+
+ rc = PTR_ERR_OR_ZERO(cxlr);
+ if (rc)
+ goto out;
+
+ attach_target(cxlr, cxled, -1, TASK_UNINTERRUPTIBLE);
+
+ down_read(&cxl_region_rwsem);
+ p = &cxlr->params;
+ attach = p->state == CXL_CONFIG_COMMIT;
+ up_read(&cxl_region_rwsem);
+
+ if (attach) {
+ /*
+ * If device_attach() fails the range may still be active via
+ * the platform-firmware memory map, otherwise the driver for
+ * regions is local to this file, so driver matching can't fail.
+ */
+ if (device_attach(&cxlr->dev) < 0)
+ dev_err(&cxlr->dev, "failed to enable, range: %pr\n",
+ p->res);
+ }
+
+ put_device(region_dev);
+out:
+ put_device(cxlrd_dev);
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, CXL);
+
static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
{
if (!test_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags))
@@ -1969,7 +2661,7 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
if (!cpu_cache_has_invalidate_memregion()) {
if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {
- dev_warn(
+ dev_warn_once(
&cxlr->dev,
"Bypassing cpu_cache_invalidate_memregion() for testing!\n");
clear_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags);
@@ -1986,6 +2678,15 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
return 0;
}
+static int is_system_ram(struct resource *res, void *arg)
+{
+ struct cxl_region *cxlr = arg;
+ struct cxl_region_params *p = &cxlr->params;
+
+ dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res);
+ return 1;
+}
+
static int cxl_region_probe(struct device *dev)
{
struct cxl_region *cxlr = to_cxl_region(dev);
@@ -2019,6 +2720,17 @@ out:
switch (cxlr->mode) {
case CXL_DECODER_PMEM:
return devm_cxl_add_pmem_region(cxlr);
+ case CXL_DECODER_RAM:
+ /*
+ * The region can not be manged by CXL if any portion of
+ * it is already online as 'System RAM'
+ */
+ if (walk_iomem_res_desc(IORES_DESC_NONE,
+ IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
+ p->res->start, p->res->end, cxlr,
+ is_system_ram) > 0)
+ return 0;
+ return devm_cxl_add_dax_region(cxlr);
default:
dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
cxlr->mode);
diff --git a/drivers/cxl/core/trace.c b/drivers/cxl/core/trace.c
new file mode 100644
index 000000000000..29ae7ce81dc5
--- /dev/null
+++ b/drivers/cxl/core/trace.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h
new file mode 100644
index 000000000000..9b8d3d997834
--- /dev/null
+++ b/drivers/cxl/core/trace.h
@@ -0,0 +1,606 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cxl
+
+#if !defined(_CXL_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _CXL_EVENTS_H
+
+#include <linux/tracepoint.h>
+#include <asm-generic/unaligned.h>
+
+#include <cxl.h>
+#include <cxlmem.h>
+
+#define CXL_RAS_UC_CACHE_DATA_PARITY BIT(0)
+#define CXL_RAS_UC_CACHE_ADDR_PARITY BIT(1)
+#define CXL_RAS_UC_CACHE_BE_PARITY BIT(2)
+#define CXL_RAS_UC_CACHE_DATA_ECC BIT(3)
+#define CXL_RAS_UC_MEM_DATA_PARITY BIT(4)
+#define CXL_RAS_UC_MEM_ADDR_PARITY BIT(5)
+#define CXL_RAS_UC_MEM_BE_PARITY BIT(6)
+#define CXL_RAS_UC_MEM_DATA_ECC BIT(7)
+#define CXL_RAS_UC_REINIT_THRESH BIT(8)
+#define CXL_RAS_UC_RSVD_ENCODE BIT(9)
+#define CXL_RAS_UC_POISON BIT(10)
+#define CXL_RAS_UC_RECV_OVERFLOW BIT(11)
+#define CXL_RAS_UC_INTERNAL_ERR BIT(14)
+#define CXL_RAS_UC_IDE_TX_ERR BIT(15)
+#define CXL_RAS_UC_IDE_RX_ERR BIT(16)
+
+#define show_uc_errs(status) __print_flags(status, " | ", \
+ { CXL_RAS_UC_CACHE_DATA_PARITY, "Cache Data Parity Error" }, \
+ { CXL_RAS_UC_CACHE_ADDR_PARITY, "Cache Address Parity Error" }, \
+ { CXL_RAS_UC_CACHE_BE_PARITY, "Cache Byte Enable Parity Error" }, \
+ { CXL_RAS_UC_CACHE_DATA_ECC, "Cache Data ECC Error" }, \
+ { CXL_RAS_UC_MEM_DATA_PARITY, "Memory Data Parity Error" }, \
+ { CXL_RAS_UC_MEM_ADDR_PARITY, "Memory Address Parity Error" }, \
+ { CXL_RAS_UC_MEM_BE_PARITY, "Memory Byte Enable Parity Error" }, \
+ { CXL_RAS_UC_MEM_DATA_ECC, "Memory Data ECC Error" }, \
+ { CXL_RAS_UC_REINIT_THRESH, "REINIT Threshold Hit" }, \
+ { CXL_RAS_UC_RSVD_ENCODE, "Received Unrecognized Encoding" }, \
+ { CXL_RAS_UC_POISON, "Received Poison From Peer" }, \
+ { CXL_RAS_UC_RECV_OVERFLOW, "Receiver Overflow" }, \
+ { CXL_RAS_UC_INTERNAL_ERR, "Component Specific Error" }, \
+ { CXL_RAS_UC_IDE_TX_ERR, "IDE Tx Error" }, \
+ { CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \
+)
+
+TRACE_EVENT(cxl_aer_uncorrectable_error,
+ TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 *hl),
+ TP_ARGS(cxlmd, status, fe, hl),
+ TP_STRUCT__entry(
+ __string(memdev, dev_name(&cxlmd->dev))
+ __string(host, dev_name(cxlmd->dev.parent))
+ __field(u64, serial)
+ __field(u32, status)
+ __field(u32, first_error)
+ __array(u32, header_log, CXL_HEADERLOG_SIZE_U32)
+ ),
+ TP_fast_assign(
+ __assign_str(memdev, dev_name(&cxlmd->dev));
+ __assign_str(host, dev_name(cxlmd->dev.parent));
+ __entry->serial = cxlmd->cxlds->serial;
+ __entry->status = status;
+ __entry->first_error = fe;
+ /*
+ * Embed the 512B headerlog data for user app retrieval and
+ * parsing, but no need to print this in the trace buffer.
+ */
+ memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE);
+ ),
+ TP_printk("memdev=%s host=%s serial=%lld: status: '%s' first_error: '%s'",
+ __get_str(memdev), __get_str(host), __entry->serial,
+ show_uc_errs(__entry->status),
+ show_uc_errs(__entry->first_error)
+ )
+);
+
+#define CXL_RAS_CE_CACHE_DATA_ECC BIT(0)
+#define CXL_RAS_CE_MEM_DATA_ECC BIT(1)
+#define CXL_RAS_CE_CRC_THRESH BIT(2)
+#define CLX_RAS_CE_RETRY_THRESH BIT(3)
+#define CXL_RAS_CE_CACHE_POISON BIT(4)
+#define CXL_RAS_CE_MEM_POISON BIT(5)
+#define CXL_RAS_CE_PHYS_LAYER_ERR BIT(6)
+
+#define show_ce_errs(status) __print_flags(status, " | ", \
+ { CXL_RAS_CE_CACHE_DATA_ECC, "Cache Data ECC Error" }, \
+ { CXL_RAS_CE_MEM_DATA_ECC, "Memory Data ECC Error" }, \
+ { CXL_RAS_CE_CRC_THRESH, "CRC Threshold Hit" }, \
+ { CLX_RAS_CE_RETRY_THRESH, "Retry Threshold" }, \
+ { CXL_RAS_CE_CACHE_POISON, "Received Cache Poison From Peer" }, \
+ { CXL_RAS_CE_MEM_POISON, "Received Memory Poison From Peer" }, \
+ { CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \
+)
+
+TRACE_EVENT(cxl_aer_correctable_error,
+ TP_PROTO(const struct cxl_memdev *cxlmd, u32 status),
+ TP_ARGS(cxlmd, status),
+ TP_STRUCT__entry(
+ __string(memdev, dev_name(&cxlmd->dev))
+ __string(host, dev_name(cxlmd->dev.parent))
+ __field(u64, serial)
+ __field(u32, status)
+ ),
+ TP_fast_assign(
+ __assign_str(memdev, dev_name(&cxlmd->dev));
+ __assign_str(host, dev_name(cxlmd->dev.parent));
+ __entry->serial = cxlmd->cxlds->serial;
+ __entry->status = status;
+ ),
+ TP_printk("memdev=%s host=%s serial=%lld: status: '%s'",
+ __get_str(memdev), __get_str(host), __entry->serial,
+ show_ce_errs(__entry->status)
+ )
+);
+
+#define cxl_event_log_type_str(type) \
+ __print_symbolic(type, \
+ { CXL_EVENT_TYPE_INFO, "Informational" }, \
+ { CXL_EVENT_TYPE_WARN, "Warning" }, \
+ { CXL_EVENT_TYPE_FAIL, "Failure" }, \
+ { CXL_EVENT_TYPE_FATAL, "Fatal" })
+
+TRACE_EVENT(cxl_overflow,
+
+ TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
+ struct cxl_get_event_payload *payload),
+
+ TP_ARGS(cxlmd, log, payload),
+
+ TP_STRUCT__entry(
+ __string(memdev, dev_name(&cxlmd->dev))
+ __string(host, dev_name(cxlmd->dev.parent))
+ __field(int, log)
+ __field(u64, serial)
+ __field(u64, first_ts)
+ __field(u64, last_ts)
+ __field(u16, count)
+ ),
+
+ TP_fast_assign(
+ __assign_str(memdev, dev_name(&cxlmd->dev));
+ __assign_str(host, dev_name(cxlmd->dev.parent));
+ __entry->serial = cxlmd->cxlds->serial;
+ __entry->log = log;
+ __entry->count = le16_to_cpu(payload->overflow_err_count);
+ __entry->first_ts = le64_to_cpu(payload->first_overflow_timestamp);
+ __entry->last_ts = le64_to_cpu(payload->last_overflow_timestamp);
+ ),
+
+ TP_printk("memdev=%s host=%s serial=%lld: log=%s : %u records from %llu to %llu",
+ __get_str(memdev), __get_str(host), __entry->serial,
+ cxl_event_log_type_str(__entry->log), __entry->count,
+ __entry->first_ts, __entry->last_ts)
+
+);
+
+/*
+ * Common Event Record Format
+ * CXL 3.0 section 8.2.9.2.1; Table 8-42
+ */
+#define CXL_EVENT_RECORD_FLAG_PERMANENT BIT(2)
+#define CXL_EVENT_RECORD_FLAG_MAINT_NEEDED BIT(3)
+#define CXL_EVENT_RECORD_FLAG_PERF_DEGRADED BIT(4)
+#define CXL_EVENT_RECORD_FLAG_HW_REPLACE BIT(5)
+#define show_hdr_flags(flags) __print_flags(flags, " | ", \
+ { CXL_EVENT_RECORD_FLAG_PERMANENT, "PERMANENT_CONDITION" }, \
+ { CXL_EVENT_RECORD_FLAG_MAINT_NEEDED, "MAINTENANCE_NEEDED" }, \
+ { CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, "PERFORMANCE_DEGRADED" }, \
+ { CXL_EVENT_RECORD_FLAG_HW_REPLACE, "HARDWARE_REPLACEMENT_NEEDED" } \
+)
+
+/*
+ * Define macros for the common header of each CXL event.
+ *
+ * Tracepoints using these macros must do 3 things:
+ *
+ * 1) Add CXL_EVT_TP_entry to TP_STRUCT__entry
+ * 2) Use CXL_EVT_TP_fast_assign within TP_fast_assign;
+ * pass the dev, log, and CXL event header
+ * 3) Use CXL_EVT_TP_printk() instead of TP_printk()
+ *
+ * See the generic_event tracepoint as an example.
+ */
+#define CXL_EVT_TP_entry \
+ __string(memdev, dev_name(&cxlmd->dev)) \
+ __string(host, dev_name(cxlmd->dev.parent)) \
+ __field(int, log) \
+ __field_struct(uuid_t, hdr_uuid) \
+ __field(u64, serial) \
+ __field(u32, hdr_flags) \
+ __field(u16, hdr_handle) \
+ __field(u16, hdr_related_handle) \
+ __field(u64, hdr_timestamp) \
+ __field(u8, hdr_length) \
+ __field(u8, hdr_maint_op_class)
+
+#define CXL_EVT_TP_fast_assign(cxlmd, l, hdr) \
+ __assign_str(memdev, dev_name(&(cxlmd)->dev)); \
+ __assign_str(host, dev_name((cxlmd)->dev.parent)); \
+ __entry->log = (l); \
+ __entry->serial = (cxlmd)->cxlds->serial; \
+ memcpy(&__entry->hdr_uuid, &(hdr).id, sizeof(uuid_t)); \
+ __entry->hdr_length = (hdr).length; \
+ __entry->hdr_flags = get_unaligned_le24((hdr).flags); \
+ __entry->hdr_handle = le16_to_cpu((hdr).handle); \
+ __entry->hdr_related_handle = le16_to_cpu((hdr).related_handle); \
+ __entry->hdr_timestamp = le64_to_cpu((hdr).timestamp); \
+ __entry->hdr_maint_op_class = (hdr).maint_op_class
+
+#define CXL_EVT_TP_printk(fmt, ...) \
+ TP_printk("memdev=%s host=%s serial=%lld log=%s : time=%llu uuid=%pUb " \
+ "len=%d flags='%s' handle=%x related_handle=%x " \
+ "maint_op_class=%u : " fmt, \
+ __get_str(memdev), __get_str(host), __entry->serial, \
+ cxl_event_log_type_str(__entry->log), \
+ __entry->hdr_timestamp, &__entry->hdr_uuid, __entry->hdr_length,\
+ show_hdr_flags(__entry->hdr_flags), __entry->hdr_handle, \
+ __entry->hdr_related_handle, __entry->hdr_maint_op_class, \
+ ##__VA_ARGS__)
+
+TRACE_EVENT(cxl_generic_event,
+
+ TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
+ struct cxl_event_record_raw *rec),
+
+ TP_ARGS(cxlmd, log, rec),
+
+ TP_STRUCT__entry(
+ CXL_EVT_TP_entry
+ __array(u8, data, CXL_EVENT_RECORD_DATA_LENGTH)
+ ),
+
+ TP_fast_assign(
+ CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr);
+ memcpy(__entry->data, &rec->data, CXL_EVENT_RECORD_DATA_LENGTH);
+ ),
+
+ CXL_EVT_TP_printk("%s",
+ __print_hex(__entry->data, CXL_EVENT_RECORD_DATA_LENGTH))
+);
+
+/*
+ * Physical Address field masks
+ *
+ * General Media Event Record
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ *
+ * DRAM Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
+ */
+#define CXL_DPA_FLAGS_MASK 0x3F
+#define CXL_DPA_MASK (~CXL_DPA_FLAGS_MASK)
+
+#define CXL_DPA_VOLATILE BIT(0)
+#define CXL_DPA_NOT_REPAIRABLE BIT(1)
+#define show_dpa_flags(flags) __print_flags(flags, "|", \
+ { CXL_DPA_VOLATILE, "VOLATILE" }, \
+ { CXL_DPA_NOT_REPAIRABLE, "NOT_REPAIRABLE" } \
+)
+
+/*
+ * General Media Event Record - GMER
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+#define CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT BIT(0)
+#define CXL_GMER_EVT_DESC_THRESHOLD_EVENT BIT(1)
+#define CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW BIT(2)
+#define show_event_desc_flags(flags) __print_flags(flags, "|", \
+ { CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, "UNCORRECTABLE_EVENT" }, \
+ { CXL_GMER_EVT_DESC_THRESHOLD_EVENT, "THRESHOLD_EVENT" }, \
+ { CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW, "POISON_LIST_OVERFLOW" } \
+)
+
+#define CXL_GMER_MEM_EVT_TYPE_ECC_ERROR 0x00
+#define CXL_GMER_MEM_EVT_TYPE_INV_ADDR 0x01
+#define CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR 0x02
+#define show_mem_event_type(type) __print_symbolic(type, \
+ { CXL_GMER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \
+ { CXL_GMER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \
+ { CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" } \
+)
+
+#define CXL_GMER_TRANS_UNKNOWN 0x00
+#define CXL_GMER_TRANS_HOST_READ 0x01
+#define CXL_GMER_TRANS_HOST_WRITE 0x02
+#define CXL_GMER_TRANS_HOST_SCAN_MEDIA 0x03
+#define CXL_GMER_TRANS_HOST_INJECT_POISON 0x04
+#define CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB 0x05
+#define CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT 0x06
+#define show_trans_type(type) __print_symbolic(type, \
+ { CXL_GMER_TRANS_UNKNOWN, "Unknown" }, \
+ { CXL_GMER_TRANS_HOST_READ, "Host Read" }, \
+ { CXL_GMER_TRANS_HOST_WRITE, "Host Write" }, \
+ { CXL_GMER_TRANS_HOST_SCAN_MEDIA, "Host Scan Media" }, \
+ { CXL_GMER_TRANS_HOST_INJECT_POISON, "Host Inject Poison" }, \
+ { CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, "Internal Media Scrub" }, \
+ { CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT, "Internal Media Management" } \
+)
+
+#define CXL_GMER_VALID_CHANNEL BIT(0)
+#define CXL_GMER_VALID_RANK BIT(1)
+#define CXL_GMER_VALID_DEVICE BIT(2)
+#define CXL_GMER_VALID_COMPONENT BIT(3)
+#define show_valid_flags(flags) __print_flags(flags, "|", \
+ { CXL_GMER_VALID_CHANNEL, "CHANNEL" }, \
+ { CXL_GMER_VALID_RANK, "RANK" }, \
+ { CXL_GMER_VALID_DEVICE, "DEVICE" }, \
+ { CXL_GMER_VALID_COMPONENT, "COMPONENT" } \
+)
+
+TRACE_EVENT(cxl_general_media,
+
+ TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
+ struct cxl_event_gen_media *rec),
+
+ TP_ARGS(cxlmd, log, rec),
+
+ TP_STRUCT__entry(
+ CXL_EVT_TP_entry
+ /* General Media */
+ __field(u64, dpa)
+ __field(u8, descriptor)
+ __field(u8, type)
+ __field(u8, transaction_type)
+ __field(u8, channel)
+ __field(u32, device)
+ __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE)
+ __field(u16, validity_flags)
+ /* Following are out of order to pack trace record */
+ __field(u8, rank)
+ __field(u8, dpa_flags)
+ ),
+
+ TP_fast_assign(
+ CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr);
+
+ /* General Media */
+ __entry->dpa = le64_to_cpu(rec->phys_addr);
+ __entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK;
+ /* Mask after flags have been parsed */
+ __entry->dpa &= CXL_DPA_MASK;
+ __entry->descriptor = rec->descriptor;
+ __entry->type = rec->type;
+ __entry->transaction_type = rec->transaction_type;
+ __entry->channel = rec->channel;
+ __entry->rank = rec->rank;
+ __entry->device = get_unaligned_le24(rec->device);
+ memcpy(__entry->comp_id, &rec->component_id,
+ CXL_EVENT_GEN_MED_COMP_ID_SIZE);
+ __entry->validity_flags = get_unaligned_le16(&rec->validity_flags);
+ ),
+
+ CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' " \
+ "descriptor='%s' type='%s' transaction_type='%s' channel=%u rank=%u " \
+ "device=%x comp_id=%s validity_flags='%s'",
+ __entry->dpa, show_dpa_flags(__entry->dpa_flags),
+ show_event_desc_flags(__entry->descriptor),
+ show_mem_event_type(__entry->type),
+ show_trans_type(__entry->transaction_type),
+ __entry->channel, __entry->rank, __entry->device,
+ __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE),
+ show_valid_flags(__entry->validity_flags)
+ )
+);
+
+/*
+ * DRAM Event Record - DER
+ *
+ * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
+ */
+/*
+ * DRAM Event Record defines many fields the same as the General Media Event
+ * Record. Reuse those definitions as appropriate.
+ */
+#define CXL_DER_VALID_CHANNEL BIT(0)
+#define CXL_DER_VALID_RANK BIT(1)
+#define CXL_DER_VALID_NIBBLE BIT(2)
+#define CXL_DER_VALID_BANK_GROUP BIT(3)
+#define CXL_DER_VALID_BANK BIT(4)
+#define CXL_DER_VALID_ROW BIT(5)
+#define CXL_DER_VALID_COLUMN BIT(6)
+#define CXL_DER_VALID_CORRECTION_MASK BIT(7)
+#define show_dram_valid_flags(flags) __print_flags(flags, "|", \
+ { CXL_DER_VALID_CHANNEL, "CHANNEL" }, \
+ { CXL_DER_VALID_RANK, "RANK" }, \
+ { CXL_DER_VALID_NIBBLE, "NIBBLE" }, \
+ { CXL_DER_VALID_BANK_GROUP, "BANK GROUP" }, \
+ { CXL_DER_VALID_BANK, "BANK" }, \
+ { CXL_DER_VALID_ROW, "ROW" }, \
+ { CXL_DER_VALID_COLUMN, "COLUMN" }, \
+ { CXL_DER_VALID_CORRECTION_MASK, "CORRECTION MASK" } \
+)
+
+TRACE_EVENT(cxl_dram,
+
+ TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
+ struct cxl_event_dram *rec),
+
+ TP_ARGS(cxlmd, log, rec),
+
+ TP_STRUCT__entry(
+ CXL_EVT_TP_entry
+ /* DRAM */
+ __field(u64, dpa)
+ __field(u8, descriptor)
+ __field(u8, type)
+ __field(u8, transaction_type)
+ __field(u8, channel)
+ __field(u16, validity_flags)
+ __field(u16, column) /* Out of order to pack trace record */
+ __field(u32, nibble_mask)
+ __field(u32, row)
+ __array(u8, cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE)
+ __field(u8, rank) /* Out of order to pack trace record */
+ __field(u8, bank_group) /* Out of order to pack trace record */
+ __field(u8, bank) /* Out of order to pack trace record */
+ __field(u8, dpa_flags) /* Out of order to pack trace record */
+ ),
+
+ TP_fast_assign(
+ CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr);
+
+ /* DRAM */
+ __entry->dpa = le64_to_cpu(rec->phys_addr);
+ __entry->dpa_flags = __entry->dpa & CXL_DPA_FLAGS_MASK;
+ __entry->dpa &= CXL_DPA_MASK;
+ __entry->descriptor = rec->descriptor;
+ __entry->type = rec->type;
+ __entry->transaction_type = rec->transaction_type;
+ __entry->validity_flags = get_unaligned_le16(rec->validity_flags);
+ __entry->channel = rec->channel;
+ __entry->rank = rec->rank;
+ __entry->nibble_mask = get_unaligned_le24(rec->nibble_mask);
+ __entry->bank_group = rec->bank_group;
+ __entry->bank = rec->bank;
+ __entry->row = get_unaligned_le24(rec->row);
+ __entry->column = get_unaligned_le16(rec->column);
+ memcpy(__entry->cor_mask, &rec->correction_mask,
+ CXL_EVENT_DER_CORRECTION_MASK_SIZE);
+ ),
+
+ CXL_EVT_TP_printk("dpa=%llx dpa_flags='%s' descriptor='%s' type='%s' " \
+ "transaction_type='%s' channel=%u rank=%u nibble_mask=%x " \
+ "bank_group=%u bank=%u row=%u column=%u cor_mask=%s " \
+ "validity_flags='%s'",
+ __entry->dpa, show_dpa_flags(__entry->dpa_flags),
+ show_event_desc_flags(__entry->descriptor),
+ show_mem_event_type(__entry->type),
+ show_trans_type(__entry->transaction_type),
+ __entry->channel, __entry->rank, __entry->nibble_mask,
+ __entry->bank_group, __entry->bank,
+ __entry->row, __entry->column,
+ __print_hex(__entry->cor_mask, CXL_EVENT_DER_CORRECTION_MASK_SIZE),
+ show_dram_valid_flags(__entry->validity_flags)
+ )
+);
+
+/*
+ * Memory Module Event Record - MMER
+ *
+ * CXL res 3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+#define CXL_MMER_HEALTH_STATUS_CHANGE 0x00
+#define CXL_MMER_MEDIA_STATUS_CHANGE 0x01
+#define CXL_MMER_LIFE_USED_CHANGE 0x02
+#define CXL_MMER_TEMP_CHANGE 0x03
+#define CXL_MMER_DATA_PATH_ERROR 0x04
+#define CXL_MMER_LSA_ERROR 0x05
+#define show_dev_evt_type(type) __print_symbolic(type, \
+ { CXL_MMER_HEALTH_STATUS_CHANGE, "Health Status Change" }, \
+ { CXL_MMER_MEDIA_STATUS_CHANGE, "Media Status Change" }, \
+ { CXL_MMER_LIFE_USED_CHANGE, "Life Used Change" }, \
+ { CXL_MMER_TEMP_CHANGE, "Temperature Change" }, \
+ { CXL_MMER_DATA_PATH_ERROR, "Data Path Error" }, \
+ { CXL_MMER_LSA_ERROR, "LSA Error" } \
+)
+
+/*
+ * Device Health Information - DHI
+ *
+ * CXL res 3.0 section 8.2.9.8.3.1; Table 8-100
+ */
+#define CXL_DHI_HS_MAINTENANCE_NEEDED BIT(0)
+#define CXL_DHI_HS_PERFORMANCE_DEGRADED BIT(1)
+#define CXL_DHI_HS_HW_REPLACEMENT_NEEDED BIT(2)
+#define show_health_status_flags(flags) __print_flags(flags, "|", \
+ { CXL_DHI_HS_MAINTENANCE_NEEDED, "MAINTENANCE_NEEDED" }, \
+ { CXL_DHI_HS_PERFORMANCE_DEGRADED, "PERFORMANCE_DEGRADED" }, \
+ { CXL_DHI_HS_HW_REPLACEMENT_NEEDED, "REPLACEMENT_NEEDED" } \
+)
+
+#define CXL_DHI_MS_NORMAL 0x00
+#define CXL_DHI_MS_NOT_READY 0x01
+#define CXL_DHI_MS_WRITE_PERSISTENCY_LOST 0x02
+#define CXL_DHI_MS_ALL_DATA_LOST 0x03
+#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS 0x04
+#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN 0x05
+#define CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT 0x06
+#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS 0x07
+#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN 0x08
+#define CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT 0x09
+#define show_media_status(ms) __print_symbolic(ms, \
+ { CXL_DHI_MS_NORMAL, \
+ "Normal" }, \
+ { CXL_DHI_MS_NOT_READY, \
+ "Not Ready" }, \
+ { CXL_DHI_MS_WRITE_PERSISTENCY_LOST, \
+ "Write Persistency Lost" }, \
+ { CXL_DHI_MS_ALL_DATA_LOST, \
+ "All Data Lost" }, \
+ { CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_POWER_LOSS, \
+ "Write Persistency Loss in the Event of Power Loss" }, \
+ { CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_EVENT_SHUTDOWN, \
+ "Write Persistency Loss in Event of Shutdown" }, \
+ { CXL_DHI_MS_WRITE_PERSISTENCY_LOSS_IMMINENT, \
+ "Write Persistency Loss Imminent" }, \
+ { CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_POWER_LOSS, \
+ "All Data Loss in Event of Power Loss" }, \
+ { CXL_DHI_MS_WRITE_ALL_DATA_LOSS_EVENT_SHUTDOWN, \
+ "All Data loss in the Event of Shutdown" }, \
+ { CXL_DHI_MS_WRITE_ALL_DATA_LOSS_IMMINENT, \
+ "All Data Loss Imminent" } \
+)
+
+#define CXL_DHI_AS_NORMAL 0x0
+#define CXL_DHI_AS_WARNING 0x1
+#define CXL_DHI_AS_CRITICAL 0x2
+#define show_two_bit_status(as) __print_symbolic(as, \
+ { CXL_DHI_AS_NORMAL, "Normal" }, \
+ { CXL_DHI_AS_WARNING, "Warning" }, \
+ { CXL_DHI_AS_CRITICAL, "Critical" } \
+)
+#define show_one_bit_status(as) __print_symbolic(as, \
+ { CXL_DHI_AS_NORMAL, "Normal" }, \
+ { CXL_DHI_AS_WARNING, "Warning" } \
+)
+
+#define CXL_DHI_AS_LIFE_USED(as) (as & 0x3)
+#define CXL_DHI_AS_DEV_TEMP(as) ((as & 0xC) >> 2)
+#define CXL_DHI_AS_COR_VOL_ERR_CNT(as) ((as & 0x10) >> 4)
+#define CXL_DHI_AS_COR_PER_ERR_CNT(as) ((as & 0x20) >> 5)
+
+TRACE_EVENT(cxl_memory_module,
+
+ TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
+ struct cxl_event_mem_module *rec),
+
+ TP_ARGS(cxlmd, log, rec),
+
+ TP_STRUCT__entry(
+ CXL_EVT_TP_entry
+
+ /* Memory Module Event */
+ __field(u8, event_type)
+
+ /* Device Health Info */
+ __field(u8, health_status)
+ __field(u8, media_status)
+ __field(u8, life_used)
+ __field(u32, dirty_shutdown_cnt)
+ __field(u32, cor_vol_err_cnt)
+ __field(u32, cor_per_err_cnt)
+ __field(s16, device_temp)
+ __field(u8, add_status)
+ ),
+
+ TP_fast_assign(
+ CXL_EVT_TP_fast_assign(cxlmd, log, rec->hdr);
+
+ /* Memory Module Event */
+ __entry->event_type = rec->event_type;
+
+ /* Device Health Info */
+ __entry->health_status = rec->info.health_status;
+ __entry->media_status = rec->info.media_status;
+ __entry->life_used = rec->info.life_used;
+ __entry->dirty_shutdown_cnt = get_unaligned_le32(rec->info.dirty_shutdown_cnt);
+ __entry->cor_vol_err_cnt = get_unaligned_le32(rec->info.cor_vol_err_cnt);
+ __entry->cor_per_err_cnt = get_unaligned_le32(rec->info.cor_per_err_cnt);
+ __entry->device_temp = get_unaligned_le16(rec->info.device_temp);
+ __entry->add_status = rec->info.add_status;
+ ),
+
+ CXL_EVT_TP_printk("event_type='%s' health_status='%s' media_status='%s' " \
+ "as_life_used=%s as_dev_temp=%s as_cor_vol_err_cnt=%s " \
+ "as_cor_per_err_cnt=%s life_used=%u device_temp=%d " \
+ "dirty_shutdown_cnt=%u cor_vol_err_cnt=%u cor_per_err_cnt=%u",
+ show_dev_evt_type(__entry->event_type),
+ show_health_status_flags(__entry->health_status),
+ show_media_status(__entry->media_status),
+ show_two_bit_status(CXL_DHI_AS_LIFE_USED(__entry->add_status)),
+ show_two_bit_status(CXL_DHI_AS_DEV_TEMP(__entry->add_status)),
+ show_one_bit_status(CXL_DHI_AS_COR_VOL_ERR_CNT(__entry->add_status)),
+ show_one_bit_status(CXL_DHI_AS_COR_PER_ERR_CNT(__entry->add_status)),
+ __entry->life_used, __entry->device_temp,
+ __entry->dirty_shutdown_cnt, __entry->cor_vol_err_cnt,
+ __entry->cor_per_err_cnt
+ )
+);
+
+#endif /* _CXL_EVENTS_H */
+
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index c9e1b48a1a53..f2b0962a552d 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -130,6 +130,7 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
#define CXL_RAS_UNCORRECTABLE_STATUS_MASK (GENMASK(16, 14) | GENMASK(11, 0))
#define CXL_RAS_UNCORRECTABLE_MASK_OFFSET 0x4
#define CXL_RAS_UNCORRECTABLE_MASK_MASK (GENMASK(16, 14) | GENMASK(11, 0))
+#define CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK BIT(8)
#define CXL_RAS_UNCORRECTABLE_SEVERITY_OFFSET 0x8
#define CXL_RAS_UNCORRECTABLE_SEVERITY_MASK (GENMASK(16, 14) | GENMASK(11, 0))
#define CXL_RAS_CORRECTABLE_STATUS_OFFSET 0xC
@@ -140,6 +141,8 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
#define CXL_RAS_CAP_CONTROL_FE_MASK GENMASK(5, 0)
#define CXL_RAS_HEADER_LOG_OFFSET 0x18
#define CXL_RAS_CAPABILITY_LENGTH 0x58
+#define CXL_HEADERLOG_SIZE SZ_512
+#define CXL_HEADERLOG_SIZE_U32 SZ_512 / sizeof(u32)
/* CXL 2.0 8.2.8.1 Device Capabilities Array Register */
#define CXLDEV_CAP_ARRAY_OFFSET 0x0
@@ -154,6 +157,22 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
#define CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX 0x3
#define CXLDEV_CAP_CAP_ID_MEMDEV 0x4000
+/* CXL 3.0 8.2.8.3.1 Event Status Register */
+#define CXLDEV_DEV_EVENT_STATUS_OFFSET 0x00
+#define CXLDEV_EVENT_STATUS_INFO BIT(0)
+#define CXLDEV_EVENT_STATUS_WARN BIT(1)
+#define CXLDEV_EVENT_STATUS_FAIL BIT(2)
+#define CXLDEV_EVENT_STATUS_FATAL BIT(3)
+
+#define CXLDEV_EVENT_STATUS_ALL (CXLDEV_EVENT_STATUS_INFO | \
+ CXLDEV_EVENT_STATUS_WARN | \
+ CXLDEV_EVENT_STATUS_FAIL | \
+ CXLDEV_EVENT_STATUS_FATAL)
+
+/* CXL rev 3.0 section 8.2.9.2.4; Table 8-52 */
+#define CXLDEV_EVENT_INT_MODE_MASK GENMASK(1, 0)
+#define CXLDEV_EVENT_INT_MSGNUM_MASK GENMASK(7, 4)
+
/* CXL 2.0 8.2.8.4 Mailbox Registers */
#define CXLDEV_MBOX_CAPS_OFFSET 0x00
#define CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK GENMASK(4, 0)
@@ -259,6 +278,8 @@ resource_size_t cxl_rcrb_to_component(struct device *dev,
* cxl_decoder flags that define the type of memory / devices this
* decoder supports as well as configuration lock status See "CXL 2.0
* 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details.
+ * Additionally indicate whether decoder settings were autodetected,
+ * user customized.
*/
#define CXL_DECODER_F_RAM BIT(0)
#define CXL_DECODER_F_PMEM BIT(1)
@@ -318,12 +339,36 @@ enum cxl_decoder_mode {
CXL_DECODER_DEAD,
};
+static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode)
+{
+ static const char * const names[] = {
+ [CXL_DECODER_NONE] = "none",
+ [CXL_DECODER_RAM] = "ram",
+ [CXL_DECODER_PMEM] = "pmem",
+ [CXL_DECODER_MIXED] = "mixed",
+ };
+
+ if (mode >= CXL_DECODER_NONE && mode <= CXL_DECODER_MIXED)
+ return names[mode];
+ return "mixed";
+}
+
+/*
+ * Track whether this decoder is reserved for region autodiscovery, or
+ * free for userspace provisioning.
+ */
+enum cxl_decoder_state {
+ CXL_DECODER_STATE_MANUAL,
+ CXL_DECODER_STATE_AUTO,
+};
+
/**
* struct cxl_endpoint_decoder - Endpoint / SPA to DPA decoder
* @cxld: base cxl_decoder_object
* @dpa_res: actively claimed DPA span of this decoder
* @skip: offset into @dpa_res where @cxld.hpa_range maps
* @mode: which memory type / access-mode-partition this decoder targets
+ * @state: autodiscovery state
* @pos: interleave position in @cxld.region
*/
struct cxl_endpoint_decoder {
@@ -331,6 +376,7 @@ struct cxl_endpoint_decoder {
struct resource *dpa_res;
resource_size_t skip;
enum cxl_decoder_mode mode;
+ enum cxl_decoder_state state;
int pos;
};
@@ -364,6 +410,7 @@ typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd,
* @region_id: region id for next region provisioning event
* @calc_hb: which host bridge covers the n'th position by granularity
* @platform_data: platform specific configuration data
+ * @range_lock: sync region autodiscovery by address range
* @cxlsd: base cxl switch decoder
*/
struct cxl_root_decoder {
@@ -371,6 +418,7 @@ struct cxl_root_decoder {
atomic_t region_id;
cxl_calc_hb_fn calc_hb;
void *platform_data;
+ struct mutex range_lock;
struct cxl_switch_decoder cxlsd;
};
@@ -420,6 +468,13 @@ struct cxl_region_params {
*/
#define CXL_REGION_F_INCOHERENT 0
+/*
+ * Indicate whether this region has been assembled by autodetection or
+ * userspace assembly. Prevent endpoint decoders outside of automatic
+ * detection from being added to the region.
+ */
+#define CXL_REGION_F_AUTO 1
+
/**
* struct cxl_region - CXL region
* @dev: This region's device
@@ -475,6 +530,12 @@ struct cxl_pmem_region {
struct cxl_pmem_region_mapping mapping[];
};
+struct cxl_dax_region {
+ struct device dev;
+ struct cxl_region *cxlr;
+ struct range hpa_range;
+};
+
/**
* struct cxl_port - logical collection of upstream port devices and
* downstream port devices to construct a CXL memory
@@ -615,8 +676,10 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
struct cxl_decoder *to_cxl_decoder(struct device *dev);
struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev);
+struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev);
bool is_root_decoder(struct device *dev);
+bool is_switch_decoder(struct device *dev);
bool is_endpoint_decoder(struct device *dev);
struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
unsigned int nr_targets,
@@ -630,10 +693,26 @@ int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map);
int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld);
int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint);
+/**
+ * struct cxl_endpoint_dvsec_info - Cached DVSEC info
+ * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE
+ * @ranges: Number of active HDM ranges this device uses.
+ * @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE
+ */
+struct cxl_endpoint_dvsec_info {
+ bool mem_enabled;
+ int ranges;
+ struct range dvsec_range[2];
+};
+
struct cxl_hdm;
-struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port);
-int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm);
+struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
+ struct cxl_endpoint_dvsec_info *info);
+int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
+ struct cxl_endpoint_dvsec_info *info);
int devm_cxl_add_passthrough_decoder(struct cxl_port *port);
+int cxl_dvsec_rr_decode(struct device *dev, int dvsec,
+ struct cxl_endpoint_dvsec_info *info);
bool is_cxl_region(struct device *dev);
@@ -667,6 +746,7 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv);
#define CXL_DEVICE_MEMORY_EXPANDER 5
#define CXL_DEVICE_REGION 6
#define CXL_DEVICE_PMEM_REGION 7
+#define CXL_DEVICE_DAX_REGION 8
#define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*")
#define CXL_MODALIAS_FMT "cxl:t%d"
@@ -683,6 +763,9 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *dev);
#ifdef CONFIG_CXL_REGION
bool is_cxl_pmem_region(struct device *dev);
struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev);
+int cxl_add_to_region(struct cxl_port *root,
+ struct cxl_endpoint_decoder *cxled);
+struct cxl_dax_region *to_cxl_dax_region(struct device *dev);
#else
static inline bool is_cxl_pmem_region(struct device *dev)
{
@@ -692,6 +775,15 @@ static inline struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
{
return NULL;
}
+static inline int cxl_add_to_region(struct cxl_port *root,
+ struct cxl_endpoint_decoder *cxled)
+{
+ return 0;
+}
+static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
+{
+ return NULL;
+}
#endif
/*
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 6749f2afb1b7..090acebba4fa 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -4,6 +4,7 @@
#define __CXL_MEM_H__
#include <uapi/linux/cxl_mem.h>
#include <linux/cdev.h>
+#include <linux/uuid.h>
#include "cxl.h"
/* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */
@@ -38,6 +39,7 @@
* @cxl_nvb: coordinate removal of @cxl_nvd if present
* @cxl_nvd: optional bridge to an nvdimm if the device supports pmem
* @id: id number of this memdev instance.
+ * @depth: endpoint port depth
*/
struct cxl_memdev {
struct device dev;
@@ -47,6 +49,7 @@ struct cxl_memdev {
struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_nvdimm *cxl_nvd;
int id;
+ int depth;
};
static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
@@ -79,6 +82,9 @@ static inline bool is_cxl_endpoint(struct cxl_port *port)
}
struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds);
+int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
+ resource_size_t base, resource_size_t len,
+ resource_size_t skipped);
static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
struct cxl_memdev *cxlmd)
@@ -182,15 +188,31 @@ static inline int cxl_mbox_cmd_rc2errno(struct cxl_mbox_cmd *mbox_cmd)
#define CXL_CAPACITY_MULTIPLIER SZ_256M
/**
- * struct cxl_endpoint_dvsec_info - Cached DVSEC info
- * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE
- * @ranges: Number of active HDM ranges this device uses.
- * @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE
+ * Event Interrupt Policy
+ *
+ * CXL rev 3.0 section 8.2.9.2.4; Table 8-52
+ */
+enum cxl_event_int_mode {
+ CXL_INT_NONE = 0x00,
+ CXL_INT_MSI_MSIX = 0x01,
+ CXL_INT_FW = 0x02
+};
+struct cxl_event_interrupt_policy {
+ u8 info_settings;
+ u8 warn_settings;
+ u8 failure_settings;
+ u8 fatal_settings;
+} __packed;
+
+/**
+ * struct cxl_event_state - Event log driver state
+ *
+ * @event_buf: Buffer to receive event data
+ * @event_log_lock: Serialize event_buf and log use
*/
-struct cxl_endpoint_dvsec_info {
- bool mem_enabled;
- int ranges;
- struct range dvsec_range[2];
+struct cxl_event_state {
+ struct cxl_get_event_payload *buf;
+ struct mutex log_lock;
};
/**
@@ -228,6 +250,7 @@ struct cxl_endpoint_dvsec_info {
* @info: Cached DVSEC information about the device.
* @serial: PCIe Device Serial Number
* @doe_mbs: PCI DOE mailbox array
+ * @event: event log driver state
* @mbox_send: @dev specific transport for transmitting mailbox commands
*
* See section 8.2.9.5.2 Capacity Configuration and Label Storage for
@@ -266,14 +289,21 @@ struct cxl_dev_state {
struct xarray doe_mbs;
+ struct cxl_event_state event;
+
int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd);
};
enum cxl_opcode {
CXL_MBOX_OP_INVALID = 0x0000,
CXL_MBOX_OP_RAW = CXL_MBOX_OP_INVALID,
+ CXL_MBOX_OP_GET_EVENT_RECORD = 0x0100,
+ CXL_MBOX_OP_CLEAR_EVENT_RECORD = 0x0101,
+ CXL_MBOX_OP_GET_EVT_INT_POLICY = 0x0102,
+ CXL_MBOX_OP_SET_EVT_INT_POLICY = 0x0103,
CXL_MBOX_OP_GET_FW_INFO = 0x0200,
CXL_MBOX_OP_ACTIVATE_FW = 0x0202,
+ CXL_MBOX_OP_SET_TIMESTAMP = 0x0301,
CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400,
CXL_MBOX_OP_GET_LOG = 0x0401,
CXL_MBOX_OP_IDENTIFY = 0x4000,
@@ -347,6 +377,136 @@ struct cxl_mbox_identify {
u8 qos_telemetry_caps;
} __packed;
+/*
+ * Common Event Record Format
+ * CXL rev 3.0 section 8.2.9.2.1; Table 8-42
+ */
+struct cxl_event_record_hdr {
+ uuid_t id;
+ u8 length;
+ u8 flags[3];
+ __le16 handle;
+ __le16 related_handle;
+ __le64 timestamp;
+ u8 maint_op_class;
+ u8 reserved[15];
+} __packed;
+
+#define CXL_EVENT_RECORD_DATA_LENGTH 0x50
+struct cxl_event_record_raw {
+ struct cxl_event_record_hdr hdr;
+ u8 data[CXL_EVENT_RECORD_DATA_LENGTH];
+} __packed;
+
+/*
+ * Get Event Records output payload
+ * CXL rev 3.0 section 8.2.9.2.2; Table 8-50
+ */
+#define CXL_GET_EVENT_FLAG_OVERFLOW BIT(0)
+#define CXL_GET_EVENT_FLAG_MORE_RECORDS BIT(1)
+struct cxl_get_event_payload {
+ u8 flags;
+ u8 reserved1;
+ __le16 overflow_err_count;
+ __le64 first_overflow_timestamp;
+ __le64 last_overflow_timestamp;
+ __le16 record_count;
+ u8 reserved2[10];
+ struct cxl_event_record_raw records[];
+} __packed;
+
+/*
+ * CXL rev 3.0 section 8.2.9.2.2; Table 8-49
+ */
+enum cxl_event_log_type {
+ CXL_EVENT_TYPE_INFO = 0x00,
+ CXL_EVENT_TYPE_WARN,
+ CXL_EVENT_TYPE_FAIL,
+ CXL_EVENT_TYPE_FATAL,
+ CXL_EVENT_TYPE_MAX
+};
+
+/*
+ * Clear Event Records input payload
+ * CXL rev 3.0 section 8.2.9.2.3; Table 8-51
+ */
+struct cxl_mbox_clear_event_payload {
+ u8 event_log; /* enum cxl_event_log_type */
+ u8 clear_flags;
+ u8 nr_recs;
+ u8 reserved[3];
+ __le16 handles[];
+} __packed;
+#define CXL_CLEAR_EVENT_MAX_HANDLES U8_MAX
+
+/*
+ * General Media Event Record
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10
+struct cxl_event_gen_media {
+ struct cxl_event_record_hdr hdr;
+ __le64 phys_addr;
+ u8 descriptor;
+ u8 type;
+ u8 transaction_type;
+ u8 validity_flags[2];
+ u8 channel;
+ u8 rank;
+ u8 device[3];
+ u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
+ u8 reserved[46];
+} __packed;
+
+/*
+ * DRAM Event Record - DER
+ * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44
+ */
+#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20
+struct cxl_event_dram {
+ struct cxl_event_record_hdr hdr;
+ __le64 phys_addr;
+ u8 descriptor;
+ u8 type;
+ u8 transaction_type;
+ u8 validity_flags[2];
+ u8 channel;
+ u8 rank;
+ u8 nibble_mask[3];
+ u8 bank_group;
+ u8 bank;
+ u8 row[3];
+ u8 column[2];
+ u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE];
+ u8 reserved[0x17];
+} __packed;
+
+/*
+ * Get Health Info Record
+ * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100
+ */
+struct cxl_get_health_info {
+ u8 health_status;
+ u8 media_status;
+ u8 add_status;
+ u8 life_used;
+ u8 device_temp[2];
+ u8 dirty_shutdown_cnt[4];
+ u8 cor_vol_err_cnt[4];
+ u8 cor_per_err_cnt[4];
+} __packed;
+
+/*
+ * Memory Module Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
+ */
+struct cxl_event_mem_module {
+ struct cxl_event_record_hdr hdr;
+ u8 event_type;
+ struct cxl_get_health_info info;
+ u8 reserved[0x3d];
+} __packed;
+
struct cxl_mbox_get_partition_info {
__le64 active_volatile_cap;
__le64 active_persistent_cap;
@@ -372,6 +532,12 @@ struct cxl_mbox_set_partition_info {
#define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0)
+/* Set Timestamp CXL 3.0 Spec 8.2.9.4.2 */
+struct cxl_mbox_set_timestamp_in {
+ __le64 timestamp;
+
+} __packed;
+
/**
* struct cxl_mem_command - Driver representation of a memory device command
* @info: Command information as it exists for the UAPI
@@ -393,7 +559,6 @@ struct cxl_mem_command {
struct cxl_command_info info;
enum cxl_opcode opcode;
u32 flags;
-#define CXL_CMD_FLAG_NONE 0
#define CXL_CMD_FLAG_FORCE_ENABLE BIT(0)
};
@@ -441,6 +606,9 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds);
struct cxl_dev_state *cxl_dev_state_create(struct device *dev);
void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
+void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status);
+int cxl_set_timestamp(struct cxl_dev_state *cxlds);
+
#ifdef CONFIG_CXL_SUSPEND
void cxl_mem_active_inc(void);
void cxl_mem_active_dec(void);
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 920909791bb9..be6a2ef3cce3 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -53,6 +53,12 @@
#define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK GENMASK(15, 8)
#define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK GENMASK(31, 16)
+/*
+ * NOTE: Currently all the functions which are enabled for CXL require their
+ * vectors to be in the first 16. Use this as the default max.
+ */
+#define CXL_PCI_DEFAULT_MAX_VECTORS 16
+
/* Register Block Identifier (RBI) */
enum cxl_regloc_type {
CXL_REGLOC_RBI_EMPTY = 0,
@@ -64,6 +70,10 @@ enum cxl_regloc_type {
int devm_cxl_port_enumerate_dports(struct cxl_port *port);
struct cxl_dev_state;
-int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm);
+int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
+ struct cxl_endpoint_dvsec_info *info);
void read_cdat_data(struct cxl_port *port);
+void cxl_cor_error_detected(struct pci_dev *pdev);
+pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state);
#endif /* __CXL_PCI_H__ */
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 258004f34281..60b23624d167 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -14,8 +14,6 @@
#include "cxlmem.h"
#include "cxlpci.h"
#include "cxl.h"
-#define CREATE_TRACE_POINTS
-#include <trace/events/cxl.h>
/**
* DOC: cxl pci
@@ -162,7 +160,7 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
/* #4 */
- dev_dbg(dev, "Sending command\n");
+ dev_dbg(dev, "Sending command: 0x%04x\n", mbox_cmd->opcode);
writel(CXLDEV_MBOX_CTRL_DOORBELL,
cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
@@ -414,13 +412,295 @@ static bool is_cxl_restricted(struct pci_dev *pdev)
return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END;
}
-static void disable_aer(void *pdev)
+/*
+ * CXL v3.0 6.2.3 Table 6-4
+ * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits
+ * mode, otherwise it's 68B flits mode.
+ */
+static bool cxl_pci_flit_256(struct pci_dev *pdev)
+{
+ u16 lnksta2;
+
+ pcie_capability_read_word(pdev, PCI_EXP_LNKSTA2, &lnksta2);
+ return lnksta2 & PCI_EXP_LNKSTA2_FLIT;
+}
+
+static int cxl_pci_ras_unmask(struct pci_dev *pdev)
+{
+ struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ void __iomem *addr;
+ u32 orig_val, val, mask;
+ u16 cap;
+ int rc;
+
+ if (!cxlds->regs.ras) {
+ dev_dbg(&pdev->dev, "No RAS registers.\n");
+ return 0;
+ }
+
+ /* BIOS has CXL error control */
+ if (!host_bridge->native_cxl_error)
+ return -ENXIO;
+
+ rc = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap);
+ if (rc)
+ return rc;
+
+ if (cap & PCI_EXP_DEVCTL_URRE) {
+ addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET;
+ orig_val = readl(addr);
+
+ mask = CXL_RAS_UNCORRECTABLE_MASK_MASK;
+ if (!cxl_pci_flit_256(pdev))
+ mask &= ~CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK;
+ val = orig_val & ~mask;
+ writel(val, addr);
+ dev_dbg(&pdev->dev,
+ "Uncorrectable RAS Errors Mask: %#x -> %#x\n",
+ orig_val, val);
+ }
+
+ if (cap & PCI_EXP_DEVCTL_CERE) {
+ addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET;
+ orig_val = readl(addr);
+ val = orig_val & ~CXL_RAS_CORRECTABLE_MASK_MASK;
+ writel(val, addr);
+ dev_dbg(&pdev->dev, "Correctable RAS Errors Mask: %#x -> %#x\n",
+ orig_val, val);
+ }
+
+ return 0;
+}
+
+static void free_event_buf(void *buf)
+{
+ kvfree(buf);
+}
+
+/*
+ * There is a single buffer for reading event logs from the mailbox. All logs
+ * share this buffer protected by the cxlds->event_log_lock.
+ */
+static int cxl_mem_alloc_event_buf(struct cxl_dev_state *cxlds)
+{
+ struct cxl_get_event_payload *buf;
+
+ buf = kvmalloc(cxlds->payload_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ cxlds->event.buf = buf;
+
+ return devm_add_action_or_reset(cxlds->dev, free_event_buf, buf);
+}
+
+static int cxl_alloc_irq_vectors(struct pci_dev *pdev)
+{
+ int nvecs;
+
+ /*
+ * Per CXL 3.0 3.1.1 CXL.io Endpoint a function on a CXL device must
+ * not generate INTx messages if that function participates in
+ * CXL.cache or CXL.mem.
+ *
+ * Additionally pci_alloc_irq_vectors() handles calling
+ * pci_free_irq_vectors() automatically despite not being called
+ * pcim_*. See pci_setup_msi_context().
+ */
+ nvecs = pci_alloc_irq_vectors(pdev, 1, CXL_PCI_DEFAULT_MAX_VECTORS,
+ PCI_IRQ_MSIX | PCI_IRQ_MSI);
+ if (nvecs < 1) {
+ dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs);
+ return -ENXIO;
+ }
+ return 0;
+}
+
+struct cxl_dev_id {
+ struct cxl_dev_state *cxlds;
+};
+
+static irqreturn_t cxl_event_thread(int irq, void *id)
+{
+ struct cxl_dev_id *dev_id = id;
+ struct cxl_dev_state *cxlds = dev_id->cxlds;
+ u32 status;
+
+ do {
+ /*
+ * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status;
+ * ignore the reserved upper 32 bits
+ */
+ status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET);
+ /* Ignore logs unknown to the driver */
+ status &= CXLDEV_EVENT_STATUS_ALL;
+ if (!status)
+ break;
+ cxl_mem_get_event_records(cxlds, status);
+ cond_resched();
+ } while (status);
+
+ return IRQ_HANDLED;
+}
+
+static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting)
+{
+ struct device *dev = cxlds->dev;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct cxl_dev_id *dev_id;
+ int irq;
+
+ if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX)
+ return -ENXIO;
+
+ /* dev_id must be globally unique and must contain the cxlds */
+ dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL);
+ if (!dev_id)
+ return -ENOMEM;
+ dev_id->cxlds = cxlds;
+
+ irq = pci_irq_vector(pdev,
+ FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting));
+ if (irq < 0)
+ return irq;
+
+ return devm_request_threaded_irq(dev, irq, NULL, cxl_event_thread,
+ IRQF_SHARED | IRQF_ONESHOT, NULL,
+ dev_id);
+}
+
+static int cxl_event_get_int_policy(struct cxl_dev_state *cxlds,
+ struct cxl_event_interrupt_policy *policy)
+{
+ struct cxl_mbox_cmd mbox_cmd = {
+ .opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY,
+ .payload_out = policy,
+ .size_out = sizeof(*policy),
+ };
+ int rc;
+
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc < 0)
+ dev_err(cxlds->dev, "Failed to get event interrupt policy : %d",
+ rc);
+
+ return rc;
+}
+
+static int cxl_event_config_msgnums(struct cxl_dev_state *cxlds,
+ struct cxl_event_interrupt_policy *policy)
+{
+ struct cxl_mbox_cmd mbox_cmd;
+ int rc;
+
+ *policy = (struct cxl_event_interrupt_policy) {
+ .info_settings = CXL_INT_MSI_MSIX,
+ .warn_settings = CXL_INT_MSI_MSIX,
+ .failure_settings = CXL_INT_MSI_MSIX,
+ .fatal_settings = CXL_INT_MSI_MSIX,
+ };
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY,
+ .payload_in = policy,
+ .size_in = sizeof(*policy),
+ };
+
+ rc = cxl_internal_send_cmd(cxlds, &mbox_cmd);
+ if (rc < 0) {
+ dev_err(cxlds->dev, "Failed to set event interrupt policy : %d",
+ rc);
+ return rc;
+ }
+
+ /* Retrieve final interrupt settings */
+ return cxl_event_get_int_policy(cxlds, policy);
+}
+
+static int cxl_event_irqsetup(struct cxl_dev_state *cxlds)
+{
+ struct cxl_event_interrupt_policy policy;
+ int rc;
+
+ rc = cxl_event_config_msgnums(cxlds, &policy);
+ if (rc)
+ return rc;
+
+ rc = cxl_event_req_irq(cxlds, policy.info_settings);
+ if (rc) {
+ dev_err(cxlds->dev, "Failed to get interrupt for event Info log\n");
+ return rc;
+ }
+
+ rc = cxl_event_req_irq(cxlds, policy.warn_settings);
+ if (rc) {
+ dev_err(cxlds->dev, "Failed to get interrupt for event Warn log\n");
+ return rc;
+ }
+
+ rc = cxl_event_req_irq(cxlds, policy.failure_settings);
+ if (rc) {
+ dev_err(cxlds->dev, "Failed to get interrupt for event Failure log\n");
+ return rc;
+ }
+
+ rc = cxl_event_req_irq(cxlds, policy.fatal_settings);
+ if (rc) {
+ dev_err(cxlds->dev, "Failed to get interrupt for event Fatal log\n");
+ return rc;
+ }
+
+ return 0;
+}
+
+static bool cxl_event_int_is_fw(u8 setting)
+{
+ u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting);
+
+ return mode == CXL_INT_FW;
+}
+
+static int cxl_event_config(struct pci_host_bridge *host_bridge,
+ struct cxl_dev_state *cxlds)
{
- pci_disable_pcie_error_reporting(pdev);
+ struct cxl_event_interrupt_policy policy;
+ int rc;
+
+ /*
+ * When BIOS maintains CXL error reporting control, it will process
+ * event records. Only one agent can do so.
+ */
+ if (!host_bridge->native_cxl_error)
+ return 0;
+
+ rc = cxl_mem_alloc_event_buf(cxlds);
+ if (rc)
+ return rc;
+
+ rc = cxl_event_get_int_policy(cxlds, &policy);
+ if (rc)
+ return rc;
+
+ if (cxl_event_int_is_fw(policy.info_settings) ||
+ cxl_event_int_is_fw(policy.warn_settings) ||
+ cxl_event_int_is_fw(policy.failure_settings) ||
+ cxl_event_int_is_fw(policy.fatal_settings)) {
+ dev_err(cxlds->dev, "FW still in control of Event Logs despite _OSC settings\n");
+ return -EBUSY;
+ }
+
+ rc = cxl_event_irqsetup(cxlds);
+ if (rc)
+ return rc;
+
+ cxl_mem_get_event_records(cxlds, CXLDEV_EVENT_STATUS_ALL);
+
+ return 0;
}
static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
+ struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
struct cxl_register_map map;
struct cxl_memdev *cxlmd;
struct cxl_dev_state *cxlds;
@@ -436,6 +716,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
rc = pcim_enable_device(pdev);
if (rc)
return rc;
+ pci_set_master(pdev);
cxlds = cxl_dev_state_create(&pdev->dev);
if (IS_ERR(cxlds))
@@ -484,6 +765,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
+ rc = cxl_set_timestamp(cxlds);
+ if (rc)
+ return rc;
+
rc = cxl_dev_state_identify(cxlds);
if (rc)
return rc;
@@ -492,16 +777,22 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
+ rc = cxl_alloc_irq_vectors(pdev);
+ if (rc)
+ return rc;
+
cxlmd = devm_cxl_add_memdev(cxlds);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
- if (cxlds->regs.ras) {
- pci_enable_pcie_error_reporting(pdev);
- rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev);
- if (rc)
- return rc;
- }
+ rc = cxl_event_config(host_bridge, cxlds);
+ if (rc)
+ return rc;
+
+ rc = cxl_pci_ras_unmask(pdev);
+ if (rc)
+ dev_dbg(&pdev->dev, "No RAS reporting unmasked\n");
+
pci_save_state(pdev);
return rc;
@@ -514,99 +805,6 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = {
};
MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
-/* CXL spec rev3.0 8.2.4.16.1 */
-static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
-{
- void __iomem *addr;
- u32 *log_addr;
- int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
-
- addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
- log_addr = log;
-
- for (i = 0; i < log_u32_size; i++) {
- *log_addr = readl(addr);
- log_addr++;
- addr += sizeof(u32);
- }
-}
-
-/*
- * Log the state of the RAS status registers and prepare them to log the
- * next error status. Return 1 if reset needed.
- */
-static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
-{
- struct cxl_memdev *cxlmd = cxlds->cxlmd;
- struct device *dev = &cxlmd->dev;
- u32 hl[CXL_HEADERLOG_SIZE_U32];
- void __iomem *addr;
- u32 status;
- u32 fe;
-
- if (!cxlds->regs.ras)
- return false;
-
- addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
- status = readl(addr);
- if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
- return false;
-
- /* If multiple errors, log header points to first error from ctrl reg */
- if (hweight32(status) > 1) {
- void __iomem *rcc_addr =
- cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
-
- fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
- readl(rcc_addr)));
- } else {
- fe = status;
- }
-
- header_log_copy(cxlds, hl);
- trace_cxl_aer_uncorrectable_error(dev, status, fe, hl);
- writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
-
- return true;
-}
-
-static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
- pci_channel_state_t state)
-{
- struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
- struct cxl_memdev *cxlmd = cxlds->cxlmd;
- struct device *dev = &cxlmd->dev;
- bool ue;
-
- /*
- * A frozen channel indicates an impending reset which is fatal to
- * CXL.mem operation, and will likely crash the system. On the off
- * chance the situation is recoverable dump the status of the RAS
- * capability registers and bounce the active state of the memdev.
- */
- ue = cxl_report_and_clear(cxlds);
-
- switch (state) {
- case pci_channel_io_normal:
- if (ue) {
- device_release_driver(dev);
- return PCI_ERS_RESULT_NEED_RESET;
- }
- return PCI_ERS_RESULT_CAN_RECOVER;
- case pci_channel_io_frozen:
- dev_warn(&pdev->dev,
- "%s: frozen state error detected, disable CXL.mem\n",
- dev_name(dev));
- device_release_driver(dev);
- return PCI_ERS_RESULT_NEED_RESET;
- case pci_channel_io_perm_failure:
- dev_warn(&pdev->dev,
- "failure state error detected, request disconnect\n");
- return PCI_ERS_RESULT_DISCONNECT;
- }
- return PCI_ERS_RESULT_NEED_RESET;
-}
-
static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev)
{
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
@@ -631,25 +829,6 @@ static void cxl_error_resume(struct pci_dev *pdev)
dev->driver ? "successful" : "failed");
}
-static void cxl_cor_error_detected(struct pci_dev *pdev)
-{
- struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
- struct cxl_memdev *cxlmd = cxlds->cxlmd;
- struct device *dev = &cxlmd->dev;
- void __iomem *addr;
- u32 status;
-
- if (!cxlds->regs.ras)
- return;
-
- addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
- status = readl(addr);
- if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
- writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
- trace_cxl_aer_correctable_error(dev, status);
- }
-}
-
static const struct pci_error_handlers cxl_error_handlers = {
.error_detected = cxl_error_detected,
.slot_reset = cxl_slot_reset,
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index 08bbbac9a6d0..71cfa1fdf902 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -76,6 +76,7 @@ static int cxl_nvdimm_probe(struct device *dev)
return rc;
set_bit(NDD_LABELING, &flags);
+ set_bit(NDD_REGISTER_SYNC, &flags);
set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index 5453771bf330..1049bb5ea496 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -30,57 +30,116 @@ static void schedule_detach(void *cxlmd)
schedule_cxl_memdev_detach(cxlmd);
}
-static int cxl_port_probe(struct device *dev)
+static int discover_region(struct device *dev, void *root)
+{
+ struct cxl_endpoint_decoder *cxled;
+ int rc;
+
+ if (!is_endpoint_decoder(dev))
+ return 0;
+
+ cxled = to_cxl_endpoint_decoder(dev);
+ if ((cxled->cxld.flags & CXL_DECODER_F_ENABLE) == 0)
+ return 0;
+
+ if (cxled->state != CXL_DECODER_STATE_AUTO)
+ return 0;
+
+ /*
+ * Region enumeration is opportunistic, if this add-event fails,
+ * continue to the next endpoint decoder.
+ */
+ rc = cxl_add_to_region(root, cxled);
+ if (rc)
+ dev_dbg(dev, "failed to add to region: %#llx-%#llx\n",
+ cxled->cxld.hpa_range.start, cxled->cxld.hpa_range.end);
+
+ return 0;
+}
+
+static int cxl_switch_port_probe(struct cxl_port *port)
{
- struct cxl_port *port = to_cxl_port(dev);
struct cxl_hdm *cxlhdm;
int rc;
+ rc = devm_cxl_port_enumerate_dports(port);
+ if (rc < 0)
+ return rc;
- if (!is_cxl_endpoint(port)) {
- rc = devm_cxl_port_enumerate_dports(port);
- if (rc < 0)
- return rc;
- if (rc == 1)
- return devm_cxl_add_passthrough_decoder(port);
- }
+ if (rc == 1)
+ return devm_cxl_add_passthrough_decoder(port);
- cxlhdm = devm_cxl_setup_hdm(port);
+ cxlhdm = devm_cxl_setup_hdm(port, NULL);
if (IS_ERR(cxlhdm))
return PTR_ERR(cxlhdm);
- if (is_cxl_endpoint(port)) {
- struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport);
- struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ return devm_cxl_enumerate_decoders(cxlhdm, NULL);
+}
- /* Cache the data early to ensure is_visible() works */
- read_cdat_data(port);
+static int cxl_endpoint_port_probe(struct cxl_port *port)
+{
+ struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport);
+ struct cxl_endpoint_dvsec_info info = { 0 };
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_hdm *cxlhdm;
+ struct cxl_port *root;
+ int rc;
- get_device(&cxlmd->dev);
- rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd);
- if (rc)
- return rc;
+ rc = cxl_dvsec_rr_decode(cxlds->dev, cxlds->cxl_dvsec, &info);
+ if (rc < 0)
+ return rc;
- rc = cxl_hdm_decode_init(cxlds, cxlhdm);
- if (rc)
- return rc;
+ cxlhdm = devm_cxl_setup_hdm(port, &info);
+ if (IS_ERR(cxlhdm))
+ return PTR_ERR(cxlhdm);
- rc = cxl_await_media_ready(cxlds);
- if (rc) {
- dev_err(dev, "Media not active (%d)\n", rc);
- return rc;
- }
- }
+ /* Cache the data early to ensure is_visible() works */
+ read_cdat_data(port);
- rc = devm_cxl_enumerate_decoders(cxlhdm);
+ get_device(&cxlmd->dev);
+ rc = devm_add_action_or_reset(&port->dev, schedule_detach, cxlmd);
+ if (rc)
+ return rc;
+
+ rc = cxl_hdm_decode_init(cxlds, cxlhdm, &info);
+ if (rc)
+ return rc;
+
+ rc = cxl_await_media_ready(cxlds);
if (rc) {
- dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc);
+ dev_err(&port->dev, "Media not active (%d)\n", rc);
return rc;
}
+ rc = devm_cxl_enumerate_decoders(cxlhdm, &info);
+ if (rc)
+ return rc;
+
+ /*
+ * This can't fail in practice as CXL root exit unregisters all
+ * descendant ports and that in turn synchronizes with cxl_port_probe()
+ */
+ root = find_cxl_root(&cxlmd->dev);
+
+ /*
+ * Now that all endpoint decoders are successfully enumerated, try to
+ * assemble regions from committed decoders
+ */
+ device_for_each_child(&port->dev, root, discover_region);
+ put_device(&root->dev);
+
return 0;
}
+static int cxl_port_probe(struct device *dev)
+{
+ struct cxl_port *port = to_cxl_port(dev);
+
+ if (is_cxl_endpoint(port))
+ return cxl_endpoint_port_probe(port);
+ return cxl_switch_port_probe(port);
+}
+
static ssize_t CDAT_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf,
loff_t offset, size_t count)