diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-12 16:33:05 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-12 16:33:05 -0800 |
| commit | e812928be2ee1c2744adf20ed04e0ce1e2fc5c13 (patch) | |
| tree | d2685be8adaca1d097adf407b333d913d74c2582 /drivers/cxl | |
| parent | cebcffe666cc82e68842e27852a019ca54072cb7 (diff) | |
| parent | 63fbf275fa9f18f7020fb8acf54fa107e51d0f23 (diff) | |
| download | lwn-e812928be2ee1c2744adf20ed04e0ce1e2fc5c13.tar.gz lwn-e812928be2ee1c2744adf20ed04e0ce1e2fc5c13.zip | |
Merge tag 'cxl-for-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull CXL updates from Dave Jiang:
- Introduce cxl_memdev_attach and pave way for soft reserved handling,
type2 accelerator enabling, and LSA 2.0 enabling. All these series
require the endpoint driver to settle before continuing the memdev
driver probe.
- Address CXL port error protocol handling and reporting.
The large patch series was split into three parts. The first two
parts are included here with the final part coming later.
The first part consists of a series of code refactoring to PCI AER
sub-system that addresses CXL and also CXL RAS code to prepare for
port error handling.
The second part refactors the CXL code to move management of
component registers to cxl_port objects to allow all CXL AER errors
to be handled through the cxl_port hierarchy.
- Provide AMD Zen5 platform address translation for CXL using ACPI
PRMT. This includes a conventions document to explain why this is
needed and how it's implemented.
- Misc CXL patches of fixes, cleanups, and updates. Including CXL
address translation for unaligned MOD3 regions.
[ TLA service: CXL is "Compute Express Link" ]
* tag 'cxl-for-7.0' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (59 commits)
cxl: Disable HPA/SPA translation handlers for Normalized Addressing
cxl/region: Factor out code into cxl_region_setup_poison()
cxl/atl: Lock decoders that need address translation
cxl: Enable AMD Zen5 address translation using ACPI PRMT
cxl/acpi: Prepare use of EFI runtime services
cxl: Introduce callback for HPA address ranges translation
cxl/region: Use region data to get the root decoder
cxl/region: Add @hpa_range argument to function cxl_calc_interleave_pos()
cxl/region: Separate region parameter setup and region construction
cxl: Simplify cxl_root_ops allocation and handling
cxl/region: Store HPA range in struct cxl_region
cxl/region: Store root decoder in struct cxl_region
cxl/region: Rename misleading variable name @hpa to @hpa_range
Documentation/driver-api/cxl: ACPI PRM Address Translation Support and AMD Zen5 enablement
cxl, doc: Moving conventions in separate files
cxl, doc: Remove isonum.txt inclusion
cxl/port: Unify endpoint and switch port lookup
cxl/port: Move endpoint component register management to cxl_port
cxl/port: Map Port RAS registers
cxl/port: Move dport RAS setup to dport add time
...
Diffstat (limited to 'drivers/cxl')
| -rw-r--r-- | drivers/cxl/Kconfig | 11 | ||||
| -rw-r--r-- | drivers/cxl/acpi.c | 46 | ||||
| -rw-r--r-- | drivers/cxl/core/Makefile | 4 | ||||
| -rw-r--r-- | drivers/cxl/core/atl.c | 211 | ||||
| -rw-r--r-- | drivers/cxl/core/cdat.c | 8 | ||||
| -rw-r--r-- | drivers/cxl/core/core.h | 40 | ||||
| -rw-r--r-- | drivers/cxl/core/edac.c | 64 | ||||
| -rw-r--r-- | drivers/cxl/core/hdm.c | 13 | ||||
| -rw-r--r-- | drivers/cxl/core/memdev.c | 109 | ||||
| -rw-r--r-- | drivers/cxl/core/pci.c | 384 | ||||
| -rw-r--r-- | drivers/cxl/core/pmem.c | 13 | ||||
| -rw-r--r-- | drivers/cxl/core/port.c | 175 | ||||
| -rw-r--r-- | drivers/cxl/core/ras.c | 198 | ||||
| -rw-r--r-- | drivers/cxl/core/ras_rch.c | 121 | ||||
| -rw-r--r-- | drivers/cxl/core/region.c | 460 | ||||
| -rw-r--r-- | drivers/cxl/core/regs.c | 14 | ||||
| -rw-r--r-- | drivers/cxl/cxl.h | 80 | ||||
| -rw-r--r-- | drivers/cxl/cxlmem.h | 21 | ||||
| -rw-r--r-- | drivers/cxl/cxlpci.h | 75 | ||||
| -rw-r--r-- | drivers/cxl/mem.c | 77 | ||||
| -rw-r--r-- | drivers/cxl/pci.c | 67 | ||||
| -rw-r--r-- | drivers/cxl/port.c | 162 |
22 files changed, 1513 insertions, 840 deletions
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 48b7314afdb8..4589bf11d3fe 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -22,6 +22,7 @@ if CXL_BUS config CXL_PCI tristate "PCI manageability" default CXL_BUS + select CXL_MEM help The CXL specification defines a "CXL memory device" sub-class in the PCI "memory controller" base class of devices. Device's identified by @@ -89,7 +90,6 @@ config CXL_PMEM config CXL_MEM tristate "CXL: Memory Expansion" - depends on CXL_PCI default CXL_BUS help The CXL.mem protocol allows a device to act as a provider of "System @@ -233,4 +233,13 @@ config CXL_MCE def_bool y depends on X86_MCE && MEMORY_FAILURE +config CXL_RAS + def_bool y + depends on ACPI_APEI_GHES && PCIEAER && CXL_BUS + +config CXL_ATL + def_bool y + depends on CXL_REGION + depends on ACPI_PRMT && AMD_NB + endif diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 49bba2b9a3c4..d78f005bd994 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -325,10 +325,6 @@ static int cxl_acpi_qos_class(struct cxl_root *cxl_root, return cxl_acpi_evaluate_qtg_dsm(handle, coord, entries, qos_class); } -static const struct cxl_root_ops acpi_root_ops = { - .qos_class = cxl_acpi_qos_class, -}; - static void del_cxl_resource(struct resource *res) { if (!res) @@ -364,7 +360,7 @@ static int add_or_reset_cxl_resource(struct resource *parent, struct resource *r return rc; } -static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd) +static void cxl_setup_extended_linear_cache(struct cxl_root_decoder *cxlrd) { struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; struct range *hpa = &cxld->hpa_range; @@ -374,12 +370,14 @@ static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd) struct resource res; int nid, rc; + /* Explicitly initialize cache size to 0 at the beginning */ + cxlrd->cache_size = 0; res = DEFINE_RES_MEM(start, size); nid = phys_to_target_node(start); rc = hmat_get_extended_linear_cache_size(&res, nid, &cache_size); if (rc) - return 0; + return; /* * The cache range is expected to be within the CFMWS. @@ -391,31 +389,10 @@ static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd) dev_warn(&cxld->dev, "Extended Linear Cache size %pa != CXL size %pa. No Support!", &cache_size, &size); - return -ENXIO; + return; } cxlrd->cache_size = cache_size; - - return 0; -} - -static void cxl_setup_extended_linear_cache(struct cxl_root_decoder *cxlrd) -{ - int rc; - - rc = cxl_acpi_set_cache_size(cxlrd); - if (rc) { - /* - * Failing to retrieve extended linear cache region resize does not - * prevent the region from functioning. Only causes cxl list showing - * incorrect region size. - */ - dev_warn(cxlrd->cxlsd.cxld.dev.parent, - "Extended linear cache retrieval failed rc:%d\n", rc); - - /* Ignoring return code */ - cxlrd->cache_size = 0; - } } DEFINE_FREE(put_cxlrd, struct cxl_root_decoder *, @@ -930,11 +907,14 @@ static int cxl_acpi_probe(struct platform_device *pdev) cxl_res->end = -1; cxl_res->flags = IORESOURCE_MEM; - cxl_root = devm_cxl_add_root(host, &acpi_root_ops); + cxl_root = devm_cxl_add_root(host); if (IS_ERR(cxl_root)) return PTR_ERR(cxl_root); + cxl_root->ops.qos_class = cxl_acpi_qos_class; root_port = &cxl_root->port; + cxl_setup_prm_address_translation(cxl_root); + rc = bus_for_each_dev(adev->dev.bus, NULL, root_port, add_host_bridge_dport); if (rc < 0) @@ -1015,8 +995,12 @@ static void __exit cxl_acpi_exit(void) cxl_bus_drain(); } -/* load before dax_hmem sees 'Soft Reserved' CXL ranges */ -subsys_initcall(cxl_acpi_init); +/* + * Load before dax_hmem sees 'Soft Reserved' CXL ranges. Use + * subsys_initcall_sync() since there is an order dependency with + * subsys_initcall(efisubsys_init), which must run first. + */ +subsys_initcall_sync(cxl_acpi_init); /* * Arrange for host-bridge ports to be active synchronous with diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 5ad8fef210b5..a639a9499972 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -14,9 +14,11 @@ cxl_core-y += pci.o cxl_core-y += hdm.o cxl_core-y += pmu.o cxl_core-y += cdat.o -cxl_core-y += ras.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o cxl_core-$(CONFIG_CXL_MCE) += mce.o cxl_core-$(CONFIG_CXL_FEATURES) += features.o cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o +cxl_core-$(CONFIG_CXL_RAS) += ras.o +cxl_core-$(CONFIG_CXL_RAS) += ras_rch.o +cxl_core-$(CONFIG_CXL_ATL) += atl.o diff --git a/drivers/cxl/core/atl.c b/drivers/cxl/core/atl.c new file mode 100644 index 000000000000..310668786189 --- /dev/null +++ b/drivers/cxl/core/atl.c @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 Advanced Micro Devices, Inc. + */ + +#include <linux/prmt.h> +#include <linux/pci.h> +#include <linux/acpi.h> + +#include <cxlmem.h> +#include "core.h" + +/* + * PRM Address Translation - CXL DPA to System Physical Address + * + * Reference: + * + * AMD Family 1Ah Models 00h–0Fh and Models 10h–1Fh + * ACPI v6.5 Porting Guide, Publication # 58088 + */ + +static const guid_t prm_cxl_dpa_spa_guid = + GUID_INIT(0xee41b397, 0x25d4, 0x452c, 0xad, 0x54, 0x48, 0xc6, 0xe3, + 0x48, 0x0b, 0x94); + +struct prm_cxl_dpa_spa_data { + u64 dpa; + u8 reserved; + u8 devfn; + u8 bus; + u8 segment; + u64 *spa; +} __packed; + +static u64 prm_cxl_dpa_spa(struct pci_dev *pci_dev, u64 dpa) +{ + struct prm_cxl_dpa_spa_data data; + u64 spa; + int rc; + + data = (struct prm_cxl_dpa_spa_data) { + .dpa = dpa, + .devfn = pci_dev->devfn, + .bus = pci_dev->bus->number, + .segment = pci_domain_nr(pci_dev->bus), + .spa = &spa, + }; + + rc = acpi_call_prm_handler(prm_cxl_dpa_spa_guid, &data); + if (rc) { + pci_dbg(pci_dev, "failed to get SPA for %#llx: %d\n", dpa, rc); + return ULLONG_MAX; + } + + pci_dbg(pci_dev, "PRM address translation: DPA -> SPA: %#llx -> %#llx\n", dpa, spa); + + return spa; +} + +static int cxl_prm_setup_root(struct cxl_root *cxl_root, void *data) +{ + struct cxl_region_context *ctx = data; + struct cxl_endpoint_decoder *cxled = ctx->cxled; + struct cxl_decoder *cxld = &cxled->cxld; + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct range hpa_range = ctx->hpa_range; + struct pci_dev *pci_dev; + u64 spa_len, len; + u64 addr, base_spa, base; + int ways, gran; + + /* + * When Normalized Addressing is enabled, the endpoint maintains a 1:1 + * mapping between HPA and DPA. If disabled, skip address translation + * and perform only a range check. + */ + if (hpa_range.start != cxled->dpa_res->start) + return 0; + + /* + * Endpoints are programmed passthrough in Normalized Addressing mode. + */ + if (ctx->interleave_ways != 1) { + dev_dbg(&cxld->dev, "unexpected interleaving config: ways: %d granularity: %d\n", + ctx->interleave_ways, ctx->interleave_granularity); + return -ENXIO; + } + + if (!cxlmd || !dev_is_pci(cxlmd->dev.parent)) { + dev_dbg(&cxld->dev, "No endpoint found: %s, range %#llx-%#llx\n", + dev_name(cxld->dev.parent), hpa_range.start, + hpa_range.end); + return -ENXIO; + } + + pci_dev = to_pci_dev(cxlmd->dev.parent); + + /* Translate HPA range to SPA. */ + base = hpa_range.start; + hpa_range.start = prm_cxl_dpa_spa(pci_dev, hpa_range.start); + hpa_range.end = prm_cxl_dpa_spa(pci_dev, hpa_range.end); + base_spa = hpa_range.start; + + if (hpa_range.start == ULLONG_MAX || hpa_range.end == ULLONG_MAX) { + dev_dbg(cxld->dev.parent, + "CXL address translation: Failed to translate HPA range: %#llx-%#llx:%#llx-%#llx(%s)\n", + hpa_range.start, hpa_range.end, ctx->hpa_range.start, + ctx->hpa_range.end, dev_name(&cxld->dev)); + return -ENXIO; + } + + /* + * Since translated addresses include the interleaving offsets, align + * the range to 256 MB. + */ + hpa_range.start = ALIGN_DOWN(hpa_range.start, SZ_256M); + hpa_range.end = ALIGN(hpa_range.end, SZ_256M) - 1; + + len = range_len(&ctx->hpa_range); + spa_len = range_len(&hpa_range); + if (!len || !spa_len || spa_len % len) { + dev_dbg(cxld->dev.parent, + "CXL address translation: HPA range not contiguous: %#llx-%#llx:%#llx-%#llx(%s)\n", + hpa_range.start, hpa_range.end, ctx->hpa_range.start, + ctx->hpa_range.end, dev_name(&cxld->dev)); + return -ENXIO; + } + + ways = spa_len / len; + gran = SZ_256; + + /* + * Determine interleave granularity + * + * Note: The position of the chunk from one interleaving block to the + * next may vary and thus cannot be considered constant. Address offsets + * larger than the interleaving block size cannot be used to calculate + * the granularity. + */ + if (ways > 1) { + while (gran <= SZ_16M) { + addr = prm_cxl_dpa_spa(pci_dev, base + gran); + if (addr != base_spa + gran) + break; + gran <<= 1; + } + } + + if (gran > SZ_16M) { + dev_dbg(cxld->dev.parent, + "CXL address translation: Cannot determine granularity: %#llx-%#llx:%#llx-%#llx(%s)\n", + hpa_range.start, hpa_range.end, ctx->hpa_range.start, + ctx->hpa_range.end, dev_name(&cxld->dev)); + return -ENXIO; + } + + /* + * The current kernel implementation does not support endpoint + * setup with Normalized Addressing. It only translates an + * endpoint's DPA to the SPA range of the host bridge. + * Therefore, the endpoint address range cannot be determined, + * making a non-auto setup impossible. If a decoder requires + * address translation, reprogramming should be disabled and + * the decoder locked. + * + * The BIOS, however, provides all the necessary address + * translation data, which the kernel can use to reconfigure + * endpoint decoders with normalized addresses. Locking the + * decoders in the BIOS would prevent a capable kernel (or + * other operating systems) from shutting down auto-generated + * regions and managing resources dynamically. + * + * Indicate that Normalized Addressing is enabled. + */ + cxld->flags |= CXL_DECODER_F_LOCK; + cxld->flags |= CXL_DECODER_F_NORMALIZED_ADDRESSING; + + ctx->hpa_range = hpa_range; + ctx->interleave_ways = ways; + ctx->interleave_granularity = gran; + + dev_dbg(&cxld->dev, + "address mapping found for %s (hpa -> spa): %#llx+%#llx -> %#llx+%#llx ways:%d granularity:%d\n", + dev_name(cxlmd->dev.parent), base, len, hpa_range.start, + spa_len, ways, gran); + + return 0; +} + +void cxl_setup_prm_address_translation(struct cxl_root *cxl_root) +{ + struct device *host = cxl_root->port.uport_dev; + u64 spa; + struct prm_cxl_dpa_spa_data data = { .spa = &spa }; + int rc; + + /* + * Applies only to PCIe Host Bridges which are children of the CXL Root + * Device (HID=“ACPI0017”). Check this and drop cxl_test instances. + */ + if (!acpi_match_device(host->driver->acpi_match_table, host)) + return; + + /* Check kernel (-EOPNOTSUPP) and firmware support (-ENODEV) */ + rc = acpi_call_prm_handler(prm_cxl_dpa_spa_guid, &data); + if (rc == -EOPNOTSUPP || rc == -ENODEV) + return; + + cxl_root->ops.translation_setup_root = cxl_prm_setup_root; +} +EXPORT_SYMBOL_NS_GPL(cxl_setup_prm_address_translation, "CXL"); diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 7120b5f2e31f..18f0f2a25113 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -213,7 +213,7 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port, if (!cxl_root) return -ENODEV; - if (!cxl_root->ops || !cxl_root->ops->qos_class) + if (!cxl_root->ops.qos_class) return -EOPNOTSUPP; xa_for_each(dsmas_xa, index, dent) { @@ -221,9 +221,9 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port, cxl_coordinates_combine(dent->coord, dent->cdat_coord, ep_c); dent->entries = 1; - rc = cxl_root->ops->qos_class(cxl_root, - &dent->coord[ACCESS_COORDINATE_CPU], - 1, &qos_class); + rc = cxl_root->ops.qos_class(cxl_root, + &dent->coord[ACCESS_COORDINATE_CPU], + 1, &qos_class); if (rc != 1) continue; diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 1fb66132b777..007b8aff0238 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -19,6 +19,14 @@ enum cxl_detach_mode { }; #ifdef CONFIG_CXL_REGION + +struct cxl_region_context { + struct cxl_endpoint_decoder *cxled; + struct range hpa_range; + int interleave_ways; + int interleave_granularity; +}; + extern struct device_attribute dev_attr_create_pmem_region; extern struct device_attribute dev_attr_create_ram_region; extern struct device_attribute dev_attr_delete_region; @@ -144,8 +152,40 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); +static inline struct device *dport_to_host(struct cxl_dport *dport) +{ + struct cxl_port *port = dport->port; + + if (is_cxl_root(port)) + return port->uport_dev; + return &port->dev; +} +#ifdef CONFIG_CXL_RAS int cxl_ras_init(void); void cxl_ras_exit(void); +bool cxl_handle_ras(struct device *dev, void __iomem *ras_base); +void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base); +void cxl_dport_map_rch_aer(struct cxl_dport *dport); +void cxl_disable_rch_root_ints(struct cxl_dport *dport); +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds); +void devm_cxl_dport_ras_setup(struct cxl_dport *dport); +#else +static inline int cxl_ras_init(void) +{ + return 0; +} +static inline void cxl_ras_exit(void) { } +static inline bool cxl_handle_ras(struct device *dev, void __iomem *ras_base) +{ + return false; +} +static inline void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base) { } +static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { } +static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { } +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } +static inline void devm_cxl_dport_ras_setup(struct cxl_dport *dport) { } +#endif /* CONFIG_CXL_RAS */ + int cxl_gpf_port_setup(struct cxl_dport *dport); struct cxl_hdm; diff --git a/drivers/cxl/core/edac.c b/drivers/cxl/core/edac.c index 79994ca9bc9f..81160260e26b 100644 --- a/drivers/cxl/core/edac.c +++ b/drivers/cxl/core/edac.c @@ -1988,6 +1988,40 @@ static int cxl_memdev_soft_ppr_init(struct cxl_memdev *cxlmd, return 0; } +static void err_rec_free(void *_cxlmd) +{ + struct cxl_memdev *cxlmd = _cxlmd; + struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; + struct cxl_event_gen_media *rec_gen_media; + struct cxl_event_dram *rec_dram; + unsigned long index; + + cxlmd->err_rec_array = NULL; + xa_for_each(&array_rec->rec_dram, index, rec_dram) + kfree(rec_dram); + xa_destroy(&array_rec->rec_dram); + + xa_for_each(&array_rec->rec_gen_media, index, rec_gen_media) + kfree(rec_gen_media); + xa_destroy(&array_rec->rec_gen_media); + kfree(array_rec); +} + +static int devm_cxl_memdev_setup_err_rec(struct cxl_memdev *cxlmd) +{ + struct cxl_mem_err_rec *array_rec = + kzalloc(sizeof(*array_rec), GFP_KERNEL); + + if (!array_rec) + return -ENOMEM; + + xa_init(&array_rec->rec_gen_media); + xa_init(&array_rec->rec_dram); + cxlmd->err_rec_array = array_rec; + + return devm_add_action_or_reset(&cxlmd->dev, err_rec_free, cxlmd); +} + int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd) { struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES]; @@ -2038,15 +2072,9 @@ int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd) } if (repair_inst) { - struct cxl_mem_err_rec *array_rec = - devm_kzalloc(&cxlmd->dev, sizeof(*array_rec), - GFP_KERNEL); - if (!array_rec) - return -ENOMEM; - - xa_init(&array_rec->rec_gen_media); - xa_init(&array_rec->rec_dram); - cxlmd->err_rec_array = array_rec; + rc = devm_cxl_memdev_setup_err_rec(cxlmd); + if (rc) + return rc; } } @@ -2088,22 +2116,4 @@ int devm_cxl_region_edac_register(struct cxl_region *cxlr) } EXPORT_SYMBOL_NS_GPL(devm_cxl_region_edac_register, "CXL"); -void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd) -{ - struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; - struct cxl_event_gen_media *rec_gen_media; - struct cxl_event_dram *rec_dram; - unsigned long index; - - if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec) - return; - - xa_for_each(&array_rec->rec_dram, index, rec_dram) - kfree(rec_dram); - xa_destroy(&array_rec->rec_dram); - xa_for_each(&array_rec->rec_gen_media, index, rec_gen_media) - kfree(rec_gen_media); - xa_destroy(&array_rec->rec_gen_media); -} -EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_release, "CXL"); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index eb5a3a7640c6..e3f0c39e6812 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -844,14 +844,13 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld) scoped_guard(rwsem_read, &cxl_rwsem.dpa) setup_hw_decoder(cxld, hdm); - port->commit_end++; rc = cxld_await_commit(hdm, cxld->id); if (rc) { dev_dbg(&port->dev, "%s: error %d committing decoder\n", dev_name(&cxld->dev), rc); - cxld->reset(cxld); return rc; } + port->commit_end++; cxld->flags |= CXL_DECODER_F_ENABLE; return 0; @@ -966,7 +965,7 @@ static int cxl_setup_hdm_decoder_from_dvsec( rc = devm_cxl_dpa_reserve(cxled, *dpa_base, len, 0); if (rc) { dev_err(&port->dev, - "decoder%d.%d: Failed to reserve DPA range %#llx - %#llx\n (%d)", + "decoder%d.%d: Failed to reserve DPA range %#llx - %#llx: %d\n", port->id, cxld->id, *dpa_base, *dpa_base + len - 1, rc); return rc; } @@ -1117,7 +1116,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip); if (rc) { dev_err(&port->dev, - "decoder%d.%d: Failed to reserve DPA range %#llx - %#llx\n (%d)", + "decoder%d.%d: Failed to reserve DPA range %#llx - %#llx: %d\n", port->id, cxld->id, *dpa_base, *dpa_base + dpa_size + skip - 1, rc); return rc; @@ -1219,12 +1218,12 @@ static int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, } /** - * __devm_cxl_switch_port_decoders_setup - allocate and setup switch decoders + * devm_cxl_switch_port_decoders_setup - allocate and setup switch decoders * @port: CXL port context * * Return 0 or -errno on error */ -int __devm_cxl_switch_port_decoders_setup(struct cxl_port *port) +int devm_cxl_switch_port_decoders_setup(struct cxl_port *port) { struct cxl_hdm *cxlhdm; @@ -1248,7 +1247,7 @@ int __devm_cxl_switch_port_decoders_setup(struct cxl_port *port) dev_err(&port->dev, "HDM decoder capability not found\n"); return -ENXIO; } -EXPORT_SYMBOL_NS_GPL(__devm_cxl_switch_port_decoders_setup, "CXL"); +EXPORT_SYMBOL_NS_GPL(devm_cxl_switch_port_decoders_setup, "CXL"); /** * devm_cxl_endpoint_decoders_setup - allocate and setup endpoint decoders diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index e370d733e440..af3d0cc65138 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -27,7 +27,6 @@ static void cxl_memdev_release(struct device *dev) struct cxl_memdev *cxlmd = to_cxl_memdev(dev); ida_free(&cxl_memdev_ida, cxlmd->id); - devm_cxl_memdev_edac_release(cxlmd); kfree(cxlmd); } @@ -642,14 +641,24 @@ static void detach_memdev(struct work_struct *work) struct cxl_memdev *cxlmd; cxlmd = container_of(work, typeof(*cxlmd), detach_work); - device_release_driver(&cxlmd->dev); + + /* + * When the creator of @cxlmd sets ->attach it indicates CXL operation + * is required. In that case, @cxlmd detach escalates to parent device + * detach. + */ + if (cxlmd->attach) + device_release_driver(cxlmd->dev.parent); + else + device_release_driver(&cxlmd->dev); put_device(&cxlmd->dev); } static struct lock_class_key cxl_memdev_key; static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, - const struct file_operations *fops) + const struct file_operations *fops, + const struct cxl_memdev_attach *attach) { struct cxl_memdev *cxlmd; struct device *dev; @@ -665,6 +674,8 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, goto err; cxlmd->id = rc; cxlmd->depth = -1; + cxlmd->attach = attach; + cxlmd->endpoint = ERR_PTR(-ENXIO); dev = &cxlmd->dev; device_initialize(dev); @@ -1051,50 +1062,84 @@ static const struct file_operations cxl_memdev_fops = { .llseek = noop_llseek, }; -struct cxl_memdev *devm_cxl_add_memdev(struct device *host, - struct cxl_dev_state *cxlds) +/* + * Activate ioctl operations, no cxl_memdev_rwsem manipulation needed as this is + * ordered with cdev_add() publishing the device. + */ +static int cxlmd_add(struct cxl_memdev *cxlmd, struct cxl_dev_state *cxlds) +{ + int rc; + + cxlmd->cxlds = cxlds; + cxlds->cxlmd = cxlmd; + + rc = cdev_device_add(&cxlmd->cdev, &cxlmd->dev); + if (rc) { + /* + * The cdev was briefly live, shutdown any ioctl operations that + * saw that state. + */ + cxl_memdev_shutdown(&cxlmd->dev); + return rc; + } + + return 0; +} + +DEFINE_FREE(put_cxlmd, struct cxl_memdev *, + if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) + +static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd) +{ + int rc; + + /* + * If @attach is provided fail if the driver is not attached upon + * return. Note that failure here could be the result of a race to + * teardown the CXL port topology. I.e. cxl_mem_probe() could have + * succeeded and then cxl_mem unbound before the lock is acquired. + */ + guard(device)(&cxlmd->dev); + if (cxlmd->attach && !cxlmd->dev.driver) { + cxl_memdev_unregister(cxlmd); + return ERR_PTR(-ENXIO); + } + + rc = devm_add_action_or_reset(cxlmd->cxlds->dev, cxl_memdev_unregister, + cxlmd); + if (rc) + return ERR_PTR(rc); + + return cxlmd; +} + +/* + * Core helper for devm_cxl_add_memdev() that wants to both create a device and + * assert to the caller that upon return cxl_mem::probe() has been invoked. + */ +struct cxl_memdev *__devm_cxl_add_memdev(struct cxl_dev_state *cxlds, + const struct cxl_memdev_attach *attach) { - struct cxl_memdev *cxlmd; struct device *dev; - struct cdev *cdev; int rc; - cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops); + struct cxl_memdev *cxlmd __free(put_cxlmd) = + cxl_memdev_alloc(cxlds, &cxl_memdev_fops, attach); if (IS_ERR(cxlmd)) return cxlmd; dev = &cxlmd->dev; rc = dev_set_name(dev, "mem%d", cxlmd->id); if (rc) - goto err; - - /* - * Activate ioctl operations, no cxl_memdev_rwsem manipulation - * needed as this is ordered with cdev_add() publishing the device. - */ - cxlmd->cxlds = cxlds; - cxlds->cxlmd = cxlmd; - - cdev = &cxlmd->cdev; - rc = cdev_device_add(cdev, dev); - if (rc) - goto err; + return ERR_PTR(rc); - rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd); + rc = cxlmd_add(cxlmd, cxlds); if (rc) return ERR_PTR(rc); - return cxlmd; -err: - /* - * The cdev was briefly live, shutdown any ioctl operations that - * saw that state. - */ - cxl_memdev_shutdown(dev); - put_device(dev); - return ERR_PTR(rc); + return cxl_memdev_autoremove(no_free_ptr(cxlmd)); } -EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, "CXL"); +EXPORT_SYMBOL_FOR_MODULES(__devm_cxl_add_memdev, "cxl_mem"); static void sanitize_teardown_notifier(void *data) { diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 5b023a0178a4..f96ce884a213 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -41,14 +41,14 @@ static int pci_get_port_num(struct pci_dev *pdev) } /** - * __devm_cxl_add_dport_by_dev - allocate a dport by dport device + * devm_cxl_add_dport_by_dev - allocate a dport by dport device * @port: cxl_port that hosts the dport * @dport_dev: 'struct device' of the dport * * Returns the allocated dport on success or ERR_PTR() of -errno on error */ -struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port, - struct device *dport_dev) +struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) { struct cxl_register_map map; struct pci_dev *pdev; @@ -69,7 +69,7 @@ struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port, device_lock_assert(&port->dev); return devm_cxl_add_dport(port, dport_dev, port_num, map.resource); } -EXPORT_SYMBOL_NS_GPL(__devm_cxl_add_dport_by_dev, "CXL"); +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport_by_dev, "CXL"); static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id) { @@ -86,12 +86,12 @@ static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id) i = 1; do { rc = pci_read_config_dword(pdev, - d + CXL_DVSEC_RANGE_SIZE_LOW(id), + d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(id), &temp); if (rc) return rc; - valid = FIELD_GET(CXL_DVSEC_MEM_INFO_VALID, temp); + valid = FIELD_GET(PCI_DVSEC_CXL_MEM_INFO_VALID, temp); if (valid) break; msleep(1000); @@ -121,11 +121,11 @@ static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id) /* Check MEM ACTIVE bit, up to 60s timeout by default */ for (i = media_ready_timeout; i; i--) { rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(id), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(id), &temp); if (rc) return rc; - active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); + active = FIELD_GET(PCI_DVSEC_CXL_MEM_ACTIVE, temp); if (active) break; msleep(1000); @@ -154,11 +154,11 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds) u16 cap; rc = pci_read_config_word(pdev, - d + CXL_DVSEC_CAP_OFFSET, &cap); + d + PCI_DVSEC_CXL_CAP, &cap); if (rc) return rc; - hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + hdm_count = FIELD_GET(PCI_DVSEC_CXL_HDM_COUNT, cap); for (i = 0; i < hdm_count; i++) { rc = cxl_dvsec_mem_range_valid(cxlds, i); if (rc) @@ -186,16 +186,16 @@ static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val) u16 ctrl; int rc; - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CTRL, &ctrl); if (rc < 0) return rc; - if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val) + if ((ctrl & PCI_DVSEC_CXL_MEM_ENABLE) == val) return 1; - ctrl &= ~CXL_DVSEC_MEM_ENABLE; + ctrl &= ~PCI_DVSEC_CXL_MEM_ENABLE; ctrl |= val; - rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl); + rc = pci_write_config_word(pdev, d + PCI_DVSEC_CXL_CTRL, ctrl); if (rc < 0) return rc; @@ -211,7 +211,7 @@ static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds) { int rc; - rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE); + rc = cxl_set_mem_enable(cxlds, PCI_DVSEC_CXL_MEM_ENABLE); if (rc < 0) return rc; if (rc > 0) @@ -273,11 +273,11 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, return -ENXIO; } - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); + rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CAP, &cap); if (rc) return rc; - if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { + if (!(cap & PCI_DVSEC_CXL_MEM_CAPABLE)) { dev_dbg(dev, "Not MEM Capable\n"); return -ENXIO; } @@ -288,7 +288,7 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, * driver is for a spec defined class code which must be CXL.mem * capable, there is no point in continuing to enable CXL.mem. */ - hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + hdm_count = FIELD_GET(PCI_DVSEC_CXL_HDM_COUNT, cap); if (!hdm_count || hdm_count > 2) return -EINVAL; @@ -297,11 +297,11 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, * disabled, and they will remain moot after the HDM Decoder * capability is enabled. */ - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CTRL, &ctrl); if (rc) return rc; - info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + info->mem_enabled = FIELD_GET(PCI_DVSEC_CXL_MEM_ENABLE, ctrl); if (!info->mem_enabled) return 0; @@ -314,35 +314,35 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, return rc; rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_HIGH(i), &temp); if (rc) return rc; size = (u64)temp << 32; rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(i), &temp); if (rc) return rc; - size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; + size |= temp & PCI_DVSEC_CXL_MEM_SIZE_LOW; if (!size) { continue; } rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_BASE_HIGH(i), &temp); if (rc) return rc; base = (u64)temp << 32; rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_BASE_LOW(i), &temp); if (rc) return rc; - base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; + base |= temp & PCI_DVSEC_CXL_MEM_BASE_LOW; info->dvsec_range[ranges++] = (struct range) { .start = base, @@ -632,324 +632,6 @@ err: } EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL"); -static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, - void __iomem *ras_base) -{ - void __iomem *addr; - u32 status; - - if (!ras_base) - return; - - addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; - status = readl(addr); - if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { - writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); - trace_cxl_aer_correctable_error(cxlds->cxlmd, status); - } -} - -static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds) -{ - return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras); -} - -/* CXL spec rev3.0 8.2.4.16.1 */ -static void header_log_copy(void __iomem *ras_base, u32 *log) -{ - void __iomem *addr; - u32 *log_addr; - int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); - - addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; - log_addr = log; - - for (i = 0; i < log_u32_size; i++) { - *log_addr = readl(addr); - log_addr++; - addr += sizeof(u32); - } -} - -/* - * Log the state of the RAS status registers and prepare them to log the - * next error status. Return 1 if reset needed. - */ -static bool __cxl_handle_ras(struct cxl_dev_state *cxlds, - void __iomem *ras_base) -{ - u32 hl[CXL_HEADERLOG_SIZE_U32]; - void __iomem *addr; - u32 status; - u32 fe; - - if (!ras_base) - return false; - - addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; - status = readl(addr); - if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) - return false; - - /* If multiple errors, log header points to first error from ctrl reg */ - if (hweight32(status) > 1) { - void __iomem *rcc_addr = - ras_base + CXL_RAS_CAP_CONTROL_OFFSET; - - fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, - readl(rcc_addr))); - } else { - fe = status; - } - - header_log_copy(ras_base, hl); - trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); - writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); - - return true; -} - -static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds) -{ - return __cxl_handle_ras(cxlds, cxlds->regs.ras); -} - -#ifdef CONFIG_PCIEAER_CXL - -static void cxl_dport_map_rch_aer(struct cxl_dport *dport) -{ - resource_size_t aer_phys; - struct device *host; - u16 aer_cap; - - aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base); - if (aer_cap) { - host = dport->reg_map.host; - aer_phys = aer_cap + dport->rcrb.base; - dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys, - sizeof(struct aer_capability_regs)); - } -} - -static void cxl_dport_map_ras(struct cxl_dport *dport) -{ - struct cxl_register_map *map = &dport->reg_map; - struct device *dev = dport->dport_dev; - - if (!map->component_map.ras.valid) - dev_dbg(dev, "RAS registers not found\n"); - else if (cxl_map_component_regs(map, &dport->regs.component, - BIT(CXL_CM_CAP_CAP_ID_RAS))) - dev_dbg(dev, "Failed to map RAS capability.\n"); -} - -static void cxl_disable_rch_root_ints(struct cxl_dport *dport) -{ - void __iomem *aer_base = dport->regs.dport_aer; - u32 aer_cmd_mask, aer_cmd; - - if (!aer_base) - return; - - /* - * Disable RCH root port command interrupts. - * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors - * - * This sequence may not be necessary. CXL spec states disabling - * the root cmd register's interrupts is required. But, PCI spec - * shows these are disabled by default on reset. - */ - aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN | - PCI_ERR_ROOT_CMD_NONFATAL_EN | - PCI_ERR_ROOT_CMD_FATAL_EN); - aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND); - aer_cmd &= ~aer_cmd_mask; - writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); -} - -/** - * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport - * @dport: the cxl_dport that needs to be initialized - * @host: host device for devm operations - */ -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) -{ - dport->reg_map.host = host; - cxl_dport_map_ras(dport); - - if (dport->rch) { - struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev); - - if (!host_bridge->native_aer) - return; - - cxl_dport_map_rch_aer(dport); - cxl_disable_rch_root_ints(dport); - } -} -EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); - -static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds, - struct cxl_dport *dport) -{ - return __cxl_handle_cor_ras(cxlds, dport->regs.ras); -} - -static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds, - struct cxl_dport *dport) -{ - return __cxl_handle_ras(cxlds, dport->regs.ras); -} - -/* - * Copy the AER capability registers using 32 bit read accesses. - * This is necessary because RCRB AER capability is MMIO mapped. Clear the - * status after copying. - * - * @aer_base: base address of AER capability block in RCRB - * @aer_regs: destination for copying AER capability - */ -static bool cxl_rch_get_aer_info(void __iomem *aer_base, - struct aer_capability_regs *aer_regs) -{ - int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); - u32 *aer_regs_buf = (u32 *)aer_regs; - int n; - - if (!aer_base) - return false; - - /* Use readl() to guarantee 32-bit accesses */ - for (n = 0; n < read_cnt; n++) - aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); - - writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); - writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); - - return true; -} - -/* Get AER severity. Return false if there is no error. */ -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, - int *severity) -{ - if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { - if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) - *severity = AER_FATAL; - else - *severity = AER_NONFATAL; - return true; - } - - if (aer_regs->cor_status & ~aer_regs->cor_mask) { - *severity = AER_CORRECTABLE; - return true; - } - - return false; -} - -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) -{ - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - struct aer_capability_regs aer_regs; - struct cxl_dport *dport; - int severity; - - struct cxl_port *port __free(put_cxl_port) = - cxl_pci_find_port(pdev, &dport); - if (!port) - return; - - if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) - return; - - if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) - return; - - pci_print_aer(pdev, severity, &aer_regs); - - if (severity == AER_CORRECTABLE) - cxl_handle_rdport_cor_ras(cxlds, dport); - else - cxl_handle_rdport_ras(cxlds, dport); -} - -#else -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } -#endif - -void cxl_cor_error_detected(struct pci_dev *pdev) -{ - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - struct device *dev = &cxlds->cxlmd->dev; - - scoped_guard(device, dev) { - if (!dev->driver) { - dev_warn(&pdev->dev, - "%s: memdev disabled, abort error handling\n", - dev_name(dev)); - return; - } - - if (cxlds->rcd) - cxl_handle_rdport_errors(cxlds); - - cxl_handle_endpoint_cor_ras(cxlds); - } -} -EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); - -pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - struct cxl_memdev *cxlmd = cxlds->cxlmd; - struct device *dev = &cxlmd->dev; - bool ue; - - scoped_guard(device, dev) { - if (!dev->driver) { - dev_warn(&pdev->dev, - "%s: memdev disabled, abort error handling\n", - dev_name(dev)); - return PCI_ERS_RESULT_DISCONNECT; - } - - if (cxlds->rcd) - cxl_handle_rdport_errors(cxlds); - /* - * A frozen channel indicates an impending reset which is fatal to - * CXL.mem operation, and will likely crash the system. On the off - * chance the situation is recoverable dump the status of the RAS - * capability registers and bounce the active state of the memdev. - */ - ue = cxl_handle_endpoint_ras(cxlds); - } - - - switch (state) { - case pci_channel_io_normal: - if (ue) { - device_release_driver(dev); - return PCI_ERS_RESULT_NEED_RESET; - } - return PCI_ERS_RESULT_CAN_RECOVER; - case pci_channel_io_frozen: - dev_warn(&pdev->dev, - "%s: frozen state error detected, disable CXL.mem\n", - dev_name(dev)); - device_release_driver(dev); - return PCI_ERS_RESULT_NEED_RESET; - case pci_channel_io_perm_failure: - dev_warn(&pdev->dev, - "failure state error detected, request disconnect\n"); - return PCI_ERS_RESULT_DISCONNECT; - } - return PCI_ERS_RESULT_NEED_RESET; -} -EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL"); - static int cxl_flit_size(struct pci_dev *pdev) { if (cxl_pci_flit_256(pdev)) @@ -1068,7 +750,7 @@ u16 cxl_gpf_get_dvsec(struct device *dev) is_port = false; dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, - is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF); + is_port ? PCI_DVSEC_CXL_PORT_GPF : PCI_DVSEC_CXL_DEVICE_GPF); if (!dvsec) dev_warn(dev, "%s GPF DVSEC not present\n", is_port ? "Port" : "Device"); @@ -1084,14 +766,14 @@ static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) switch (phase) { case 1: - offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET; - base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK; - scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK; + offset = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_CONTROL; + base = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_BASE; + scale = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_SCALE; break; case 2: - offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET; - base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK; - scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK; + offset = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_CONTROL; + base = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_BASE; + scale = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_SCALE; break; default: return -EINVAL; diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index 8853415c106a..e7b1e6fa0ea0 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -237,12 +237,13 @@ static void cxlmd_release_nvdimm(void *_cxlmd) /** * devm_cxl_add_nvdimm() - add a bridge between a cxl_memdev and an nvdimm - * @parent_port: parent port for the (to be added) @cxlmd endpoint port - * @cxlmd: cxl_memdev instance that will perform LIBNVDIMM operations + * @host: host device for devm operations + * @port: any port in the CXL topology to find the nvdimm-bridge device + * @cxlmd: parent of the to be created cxl_nvdimm device * * Return: 0 on success negative error code on failure. */ -int devm_cxl_add_nvdimm(struct cxl_port *parent_port, +int devm_cxl_add_nvdimm(struct device *host, struct cxl_port *port, struct cxl_memdev *cxlmd) { struct cxl_nvdimm_bridge *cxl_nvb; @@ -250,7 +251,7 @@ int devm_cxl_add_nvdimm(struct cxl_port *parent_port, struct device *dev; int rc; - cxl_nvb = cxl_find_nvdimm_bridge(parent_port); + cxl_nvb = cxl_find_nvdimm_bridge(port); if (!cxl_nvb) return -ENODEV; @@ -270,10 +271,10 @@ int devm_cxl_add_nvdimm(struct cxl_port *parent_port, if (rc) goto err; - dev_dbg(&cxlmd->dev, "register %s\n", dev_name(dev)); + dev_dbg(host, "register %s\n", dev_name(dev)); /* @cxlmd carries a reference on @cxl_nvb until cxlmd_release_nvdimm */ - return devm_add_action_or_reset(&cxlmd->dev, cxlmd_release_nvdimm, cxlmd); + return devm_add_action_or_reset(host, cxlmd_release_nvdimm, cxlmd); err: put_device(dev); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 3310dbfae9d6..fea8d5f5f331 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -778,7 +778,7 @@ static int cxl_setup_comp_regs(struct device *host, struct cxl_register_map *map return cxl_setup_regs(map); } -static int cxl_port_setup_regs(struct cxl_port *port, +int cxl_port_setup_regs(struct cxl_port *port, resource_size_t component_reg_phys) { if (dev_is_platform(port->uport_dev)) @@ -786,6 +786,7 @@ static int cxl_port_setup_regs(struct cxl_port *port, return cxl_setup_comp_regs(&port->dev, &port->reg_map, component_reg_phys); } +EXPORT_SYMBOL_NS_GPL(cxl_port_setup_regs, "CXL"); static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport, resource_size_t component_reg_phys) @@ -822,16 +823,18 @@ DEFINE_DEBUGFS_ATTRIBUTE(cxl_einj_inject_fops, NULL, cxl_einj_inject, static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport) { + struct cxl_port *parent = parent_port_of(dport->port); struct dentry *dir; if (!einj_cxl_is_initialized()) return; /* - * dport_dev needs to be a PCIe port for CXL 2.0+ ports because - * EINJ expects a dport SBDF to be specified for 2.0 error injection. + * Protocol error injection is only available for CXL 2.0+ root ports + * and CXL 1.1 downstream ports */ - if (!dport->rch && !dev_is_pci(dport->dport_dev)) + if (!dport->rch && + !(dev_is_pci(dport->dport_dev) && parent && is_cxl_root(parent))) return; dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev)); @@ -954,19 +957,15 @@ struct cxl_port *devm_cxl_add_port(struct device *host, } EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, "CXL"); -struct cxl_root *devm_cxl_add_root(struct device *host, - const struct cxl_root_ops *ops) +struct cxl_root *devm_cxl_add_root(struct device *host) { - struct cxl_root *cxl_root; struct cxl_port *port; port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL); if (IS_ERR(port)) return ERR_CAST(port); - cxl_root = to_cxl_root(port); - cxl_root->ops = ops; - return cxl_root; + return to_cxl_root(port); } EXPORT_SYMBOL_NS_GPL(devm_cxl_add_root, "CXL"); @@ -1066,11 +1065,15 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *dport) return -EBUSY; } + /* Arrange for dport_dev to be valid through remove_dport() */ + struct device *dev __free(put_device) = get_device(dport->dport_dev); + rc = xa_insert(&port->dports, (unsigned long)dport->dport_dev, dport, GFP_KERNEL); if (rc) return rc; + retain_and_null_ptr(dev); port->nr_dports++; return 0; } @@ -1099,6 +1102,7 @@ static void cxl_dport_remove(void *data) struct cxl_dport *dport = data; struct cxl_port *port = dport->port; + port->nr_dports--; xa_erase(&port->dports, (unsigned long) dport->dport_dev); put_device(dport->dport_dev); } @@ -1113,6 +1117,48 @@ static void cxl_dport_unlink(void *data) sysfs_remove_link(&port->dev.kobj, link_name); } +static void free_dport(void *dport) +{ + kfree(dport); +} + +/* + * Upon return either a group is established with one action (free_dport()), or + * no group established and @dport is freed. + */ +static void *cxl_dport_open_dr_group_or_free(struct cxl_dport *dport) +{ + int rc; + struct device *host = dport_to_host(dport); + void *group = devres_open_group(host, dport, GFP_KERNEL); + + if (!group) { + kfree(dport); + return NULL; + } + + rc = devm_add_action_or_reset(host, free_dport, dport); + if (rc) { + devres_release_group(host, group); + return NULL; + } + + return group; +} + +static void cxl_dport_close_dr_group(struct cxl_dport *dport, void *group) +{ + devres_close_group(dport_to_host(dport), group); +} + +static void del_dport(struct cxl_dport *dport) +{ + devres_release_group(dport_to_host(dport), dport); +} + +/* The dport group id is the dport */ +DEFINE_FREE(cxl_dport_release_dr_group, void *, if (_T) del_dport(_T)) + static struct cxl_dport * __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, int port_id, resource_size_t component_reg_phys, @@ -1138,14 +1184,20 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, CXL_TARGET_STRLEN) return ERR_PTR(-EINVAL); - dport = devm_kzalloc(host, sizeof(*dport), GFP_KERNEL); + dport = kzalloc(sizeof(*dport), GFP_KERNEL); if (!dport) return ERR_PTR(-ENOMEM); + /* Just enough init to manage the devres group */ dport->dport_dev = dport_dev; dport->port_id = port_id; dport->port = port; + void *dport_dr_group __free(cxl_dport_release_dr_group) = + cxl_dport_open_dr_group_or_free(dport); + if (!dport_dr_group) + return ERR_PTR(-ENOMEM); + if (rcrb == CXL_RESOURCE_NONE) { rc = cxl_dport_setup_regs(&port->dev, dport, component_reg_phys); @@ -1181,21 +1233,6 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, if (rc) return ERR_PTR(rc); - /* - * Setup port register if this is the first dport showed up. Having - * a dport also means that there is at least 1 active link. - */ - if (port->nr_dports == 1 && - port->component_reg_phys != CXL_RESOURCE_NONE) { - rc = cxl_port_setup_regs(port, port->component_reg_phys); - if (rc) { - xa_erase(&port->dports, (unsigned long)dport->dport_dev); - return ERR_PTR(rc); - } - port->component_reg_phys = CXL_RESOURCE_NONE; - } - - get_device(dport_dev); rc = devm_add_action_or_reset(host, cxl_dport_remove, dport); if (rc) return ERR_PTR(rc); @@ -1213,6 +1250,12 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, cxl_debugfs_create_dport_dir(dport); + if (!dport->rch) + devm_cxl_dport_ras_setup(dport); + + /* keep the group, and mark the end of devm actions */ + cxl_dport_close_dr_group(dport, no_free_ptr(dport_dr_group)); + return dport; } @@ -1439,15 +1482,6 @@ static void delete_switch_port(struct cxl_port *port) devm_release_action(port->dev.parent, unregister_port, port); } -static void del_dport(struct cxl_dport *dport) -{ - struct cxl_port *port = dport->port; - - devm_release_action(&port->dev, cxl_dport_unlink, dport); - devm_release_action(&port->dev, cxl_dport_remove, dport); - devm_kfree(&port->dev, dport); -} - static void del_dports(struct cxl_port *port) { struct cxl_dport *dport; @@ -1554,10 +1588,20 @@ static int match_port_by_uport(struct device *dev, const void *data) return 0; port = to_cxl_port(dev); + /* Endpoint ports are hosted by memdevs */ + if (is_cxl_memdev(port->uport_dev)) + return uport_dev == port->uport_dev->parent; return uport_dev == port->uport_dev; } -/* +/** + * find_cxl_port_by_uport - Find a CXL port device companion + * @uport_dev: Device that acts as a switch or endpoint in the CXL hierarchy + * + * In the case of endpoint ports recall that port->uport_dev points to a 'struct + * cxl_memdev' device. So, the @uport_dev argument is the parent device of the + * 'struct cxl_memdev' in that case. + * * Function takes a device reference on the port device. Caller should do a * put_device() when done. */ @@ -1597,47 +1641,44 @@ static int update_decoder_targets(struct device *dev, void *data) return 0; } -DEFINE_FREE(del_cxl_dport, struct cxl_dport *, if (!IS_ERR_OR_NULL(_T)) del_dport(_T)) -static struct cxl_dport *cxl_port_add_dport(struct cxl_port *port, - struct device *dport_dev) +void cxl_port_update_decoder_targets(struct cxl_port *port, + struct cxl_dport *dport) { - struct cxl_dport *dport; - int rc; + device_for_each_child(&port->dev, dport, update_decoder_targets); +} +EXPORT_SYMBOL_NS_GPL(cxl_port_update_decoder_targets, "CXL"); - device_lock_assert(&port->dev); - if (!port->dev.driver) - return ERR_PTR(-ENXIO); +static bool dport_exists(struct cxl_port *port, struct device *dport_dev) +{ + struct cxl_dport *dport = cxl_find_dport_by_dev(port, dport_dev); - dport = cxl_find_dport_by_dev(port, dport_dev); if (dport) { dev_dbg(&port->dev, "dport%d:%s already exists\n", dport->port_id, dev_name(dport_dev)); - return ERR_PTR(-EBUSY); + return true; } - struct cxl_dport *new_dport __free(del_cxl_dport) = - devm_cxl_add_dport_by_dev(port, dport_dev); - if (IS_ERR(new_dport)) - return new_dport; + return false; +} - cxl_switch_parse_cdat(new_dport); +static struct cxl_dport *probe_dport(struct cxl_port *port, + struct device *dport_dev) +{ + struct cxl_driver *drv; - if (ida_is_empty(&port->decoder_ida)) { - rc = devm_cxl_switch_port_decoders_setup(port); - if (rc) - return ERR_PTR(rc); - dev_dbg(&port->dev, "first dport%d:%s added with decoders\n", - new_dport->port_id, dev_name(dport_dev)); - return no_free_ptr(new_dport); - } + device_lock_assert(&port->dev); + if (!port->dev.driver) + return ERR_PTR(-ENXIO); - /* New dport added, update the decoder targets */ - device_for_each_child(&port->dev, new_dport, update_decoder_targets); + if (dport_exists(port, dport_dev)) + return ERR_PTR(-EBUSY); - dev_dbg(&port->dev, "dport%d:%s added\n", new_dport->port_id, - dev_name(dport_dev)); + drv = container_of(port->dev.driver, struct cxl_driver, drv); + if (!drv->add_dport) + return ERR_PTR(-ENXIO); - return no_free_ptr(new_dport); + /* see cxl_port_add_dport() */ + return drv->add_dport(port, dport_dev); } static struct cxl_dport *devm_cxl_create_port(struct device *ep_dev, @@ -1684,7 +1725,7 @@ static struct cxl_dport *devm_cxl_create_port(struct device *ep_dev, } guard(device)(&port->dev); - return cxl_port_add_dport(port, dport_dev); + return probe_dport(port, dport_dev); } static int add_port_attach_ep(struct cxl_memdev *cxlmd, @@ -1716,7 +1757,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, scoped_guard(device, &parent_port->dev) { parent_dport = cxl_find_dport_by_dev(parent_port, dparent); if (!parent_dport) { - parent_dport = cxl_port_add_dport(parent_port, dparent); + parent_dport = probe_dport(parent_port, dparent); if (IS_ERR(parent_dport)) return PTR_ERR(parent_dport); } @@ -1752,7 +1793,7 @@ static struct cxl_dport *find_or_add_dport(struct cxl_port *port, device_lock_assert(&port->dev); dport = cxl_find_dport_by_dev(port, dport_dev); if (!dport) { - dport = cxl_port_add_dport(port, dport_dev); + dport = probe_dport(port, dport_dev); if (IS_ERR(dport)) return dport; diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index a90480d07c87..006c6ffc2f56 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -5,6 +5,7 @@ #include <linux/aer.h> #include <cxl/event.h> #include <cxlmem.h> +#include <cxlpci.h> #include "trace.h" static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev, @@ -125,3 +126,200 @@ void cxl_ras_exit(void) cxl_cper_unregister_prot_err_work(&cxl_cper_prot_err_work); cancel_work_sync(&cxl_cper_prot_err_work); } + +static void cxl_dport_map_ras(struct cxl_dport *dport) +{ + struct cxl_register_map *map = &dport->reg_map; + struct device *dev = dport->dport_dev; + + if (!map->component_map.ras.valid) + dev_dbg(dev, "RAS registers not found\n"); + else if (cxl_map_component_regs(map, &dport->regs.component, + BIT(CXL_CM_CAP_CAP_ID_RAS))) + dev_dbg(dev, "Failed to map RAS capability.\n"); +} + +/** + * devm_cxl_dport_ras_setup - Setup CXL RAS report on this dport + * @dport: the cxl_dport that needs to be initialized + */ +void devm_cxl_dport_ras_setup(struct cxl_dport *dport) +{ + dport->reg_map.host = dport_to_host(dport); + cxl_dport_map_ras(dport); +} + +void devm_cxl_dport_rch_ras_setup(struct cxl_dport *dport) +{ + struct pci_host_bridge *host_bridge; + + if (!dev_is_pci(dport->dport_dev)) + return; + + devm_cxl_dport_ras_setup(dport); + + host_bridge = to_pci_host_bridge(dport->dport_dev); + if (!host_bridge->native_aer) + return; + + cxl_dport_map_rch_aer(dport); + cxl_disable_rch_root_ints(dport); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_dport_rch_ras_setup, "CXL"); + +void devm_cxl_port_ras_setup(struct cxl_port *port) +{ + struct cxl_register_map *map = &port->reg_map; + + if (!map->component_map.ras.valid) { + dev_dbg(&port->dev, "RAS registers not found\n"); + return; + } + + map->host = &port->dev; + if (cxl_map_component_regs(map, &port->regs, + BIT(CXL_CM_CAP_CAP_ID_RAS))) + dev_dbg(&port->dev, "Failed to map RAS capability\n"); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_port_ras_setup, "CXL"); + +void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base) +{ + void __iomem *addr; + u32 status; + + if (!ras_base) + return; + + addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); + trace_cxl_aer_correctable_error(to_cxl_memdev(dev), status); + } +} + +/* CXL spec rev3.0 8.2.4.16.1 */ +static void header_log_copy(void __iomem *ras_base, u32 *log) +{ + void __iomem *addr; + u32 *log_addr; + int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); + + addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; + log_addr = log; + + for (i = 0; i < log_u32_size; i++) { + *log_addr = readl(addr); + log_addr++; + addr += sizeof(u32); + } +} + +/* + * Log the state of the RAS status registers and prepare them to log the + * next error status. Return 1 if reset needed. + */ +bool cxl_handle_ras(struct device *dev, void __iomem *ras_base) +{ + u32 hl[CXL_HEADERLOG_SIZE_U32]; + void __iomem *addr; + u32 status; + u32 fe; + + if (!ras_base) + return false; + + addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) + return false; + + /* If multiple errors, log header points to first error from ctrl reg */ + if (hweight32(status) > 1) { + void __iomem *rcc_addr = + ras_base + CXL_RAS_CAP_CONTROL_OFFSET; + + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, + readl(rcc_addr))); + } else { + fe = status; + } + + header_log_copy(ras_base, hl); + trace_cxl_aer_uncorrectable_error(to_cxl_memdev(dev), status, fe, hl); + writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); + + return true; +} + +void cxl_cor_error_detected(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlds->cxlmd->dev; + + scoped_guard(device, dev) { + if (!dev->driver) { + dev_warn(&pdev->dev, + "%s: memdev disabled, abort error handling\n", + dev_name(dev)); + return; + } + + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + + cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlmd->endpoint->regs.ras); + } +} +EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); + +pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + bool ue; + + scoped_guard(device, dev) { + if (!dev->driver) { + dev_warn(&pdev->dev, + "%s: memdev disabled, abort error handling\n", + dev_name(dev)); + return PCI_ERS_RESULT_DISCONNECT; + } + + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + /* + * A frozen channel indicates an impending reset which is fatal to + * CXL.mem operation, and will likely crash the system. On the off + * chance the situation is recoverable dump the status of the RAS + * capability registers and bounce the active state of the memdev. + */ + ue = cxl_handle_ras(&cxlds->cxlmd->dev, cxlmd->endpoint->regs.ras); + } + + switch (state) { + case pci_channel_io_normal: + if (ue) { + device_release_driver(dev); + return PCI_ERS_RESULT_NEED_RESET; + } + return PCI_ERS_RESULT_CAN_RECOVER; + case pci_channel_io_frozen: + dev_warn(&pdev->dev, + "%s: frozen state error detected, disable CXL.mem\n", + dev_name(dev)); + device_release_driver(dev); + return PCI_ERS_RESULT_NEED_RESET; + case pci_channel_io_perm_failure: + dev_warn(&pdev->dev, + "failure state error detected, request disconnect\n"); + return PCI_ERS_RESULT_DISCONNECT; + } + return PCI_ERS_RESULT_NEED_RESET; +} +EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL"); diff --git a/drivers/cxl/core/ras_rch.c b/drivers/cxl/core/ras_rch.c new file mode 100644 index 000000000000..0a8b3b9b6388 --- /dev/null +++ b/drivers/cxl/core/ras_rch.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2025 AMD Corporation. All rights reserved. */ + +#include <linux/types.h> +#include <linux/aer.h> +#include "cxl.h" +#include "core.h" +#include "cxlmem.h" + +void cxl_dport_map_rch_aer(struct cxl_dport *dport) +{ + resource_size_t aer_phys; + struct device *host; + u16 aer_cap; + + aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base); + if (aer_cap) { + host = dport->reg_map.host; + aer_phys = aer_cap + dport->rcrb.base; + dport->regs.dport_aer = + devm_cxl_iomap_block(host, aer_phys, + sizeof(struct aer_capability_regs)); + } +} + +void cxl_disable_rch_root_ints(struct cxl_dport *dport) +{ + void __iomem *aer_base = dport->regs.dport_aer; + u32 aer_cmd_mask, aer_cmd; + + if (!aer_base) + return; + + /* + * Disable RCH root port command interrupts. + * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors + * + * This sequence may not be necessary. CXL spec states disabling + * the root cmd register's interrupts is required. But, PCI spec + * shows these are disabled by default on reset. + */ + aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN | + PCI_ERR_ROOT_CMD_NONFATAL_EN | + PCI_ERR_ROOT_CMD_FATAL_EN); + aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND); + aer_cmd &= ~aer_cmd_mask; + writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); +} + +/* + * Copy the AER capability registers using 32 bit read accesses. + * This is necessary because RCRB AER capability is MMIO mapped. Clear the + * status after copying. + * + * @aer_base: base address of AER capability block in RCRB + * @aer_regs: destination for copying AER capability + */ +static bool cxl_rch_get_aer_info(void __iomem *aer_base, + struct aer_capability_regs *aer_regs) +{ + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); + u32 *aer_regs_buf = (u32 *)aer_regs; + int n; + + if (!aer_base) + return false; + + /* Use readl() to guarantee 32-bit accesses */ + for (n = 0; n < read_cnt; n++) + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); + + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); + + return true; +} + +/* Get AER severity. Return false if there is no error. */ +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, + int *severity) +{ + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) + *severity = AER_FATAL; + else + *severity = AER_NONFATAL; + return true; + } + + if (aer_regs->cor_status & ~aer_regs->cor_mask) { + *severity = AER_CORRECTABLE; + return true; + } + + return false; +} + +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + struct aer_capability_regs aer_regs; + struct cxl_dport *dport; + int severity; + + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); + if (!port) + return; + + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) + return; + + if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) + return; + + pci_print_aer(pdev, severity, &aer_regs); + if (severity == AER_CORRECTABLE) + cxl_handle_cor_ras(&cxlds->cxlmd->dev, dport->regs.ras); + else + cxl_handle_ras(&cxlds->cxlmd->dev, dport->regs.ras); +} diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 5bd1213737fa..08fa3deef70a 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -489,9 +489,9 @@ static ssize_t interleave_ways_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent); - struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_root_decoder *cxlrd = cxlr->cxlrd; + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; struct cxl_region_params *p = &cxlr->params; unsigned int val, save; int rc; @@ -552,9 +552,9 @@ static ssize_t interleave_granularity_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent); - struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_root_decoder *cxlrd = cxlr->cxlrd; + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; struct cxl_region_params *p = &cxlr->params; int rc, val; u16 ig; @@ -628,7 +628,7 @@ static DEVICE_ATTR_RO(mode); static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_root_decoder *cxlrd = cxlr->cxlrd; struct cxl_region_params *p = &cxlr->params; struct resource *res; u64 remainder = 0; @@ -664,6 +664,8 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) return PTR_ERR(res); } + cxlr->hpa_range = DEFINE_RANGE(res->start, res->end); + p->res = res; p->state = CXL_CONFIG_INTERLEAVE_ACTIVE; @@ -700,6 +702,8 @@ static int free_hpa(struct cxl_region *cxlr) if (p->state >= CXL_CONFIG_ACTIVE) return -EBUSY; + cxlr->hpa_range = DEFINE_RANGE(0, -1); + cxl_region_iomem_release(cxlr); p->state = CXL_CONFIG_IDLE; return 0; @@ -1093,14 +1097,16 @@ static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr, return 0; } -static void cxl_region_set_lock(struct cxl_region *cxlr, - struct cxl_decoder *cxld) +static void cxl_region_setup_flags(struct cxl_region *cxlr, + struct cxl_decoder *cxld) { - if (!test_bit(CXL_DECODER_F_LOCK, &cxld->flags)) - return; + if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags)) { + set_bit(CXL_REGION_F_LOCK, &cxlr->flags); + clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); + } - set_bit(CXL_REGION_F_LOCK, &cxlr->flags); - clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); + if (test_bit(CXL_DECODER_F_NORMALIZED_ADDRESSING, &cxld->flags)) + set_bit(CXL_REGION_F_NORMALIZED_ADDRESSING, &cxlr->flags); } /** @@ -1214,7 +1220,7 @@ static int cxl_port_attach_region(struct cxl_port *port, } } - cxl_region_set_lock(cxlr, cxld); + cxl_region_setup_flags(cxlr, cxld); rc = cxl_rr_ep_add(cxl_rr, cxled); if (rc) { @@ -1373,7 +1379,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_root_decoder *cxlrd = cxlr->cxlrd; int parent_iw, parent_ig, ig, iw, rc, pos = cxled->pos; struct cxl_port *parent_port = to_cxl_port(port->dev.parent); struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr); @@ -1731,10 +1737,10 @@ static int cxl_region_validate_position(struct cxl_region *cxlr, } static int cxl_region_attach_position(struct cxl_region *cxlr, - struct cxl_root_decoder *cxlrd, struct cxl_endpoint_decoder *cxled, const struct cxl_dport *dport, int pos) { + struct cxl_root_decoder *cxlrd = cxlr->cxlrd; struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; struct cxl_decoder *cxld = &cxlsd->cxld; @@ -1874,6 +1880,7 @@ static int find_pos_and_ways(struct cxl_port *port, struct range *range, /** * cxl_calc_interleave_pos() - calculate an endpoint position in a region * @cxled: endpoint decoder member of given region + * @hpa_range: translated HPA range of the endpoint * * The endpoint position is calculated by traversing the topology from * the endpoint to the root decoder and iteratively applying this @@ -1886,11 +1893,11 @@ static int find_pos_and_ways(struct cxl_port *port, struct range *range, * Return: position >= 0 on success * -ENXIO on failure */ -static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled) +static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled, + struct range *hpa_range) { struct cxl_port *iter, *port = cxled_to_port(cxled); struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct range *range = &cxled->cxld.hpa_range; int parent_ways = 0, parent_pos = 0, pos = 0; int rc; @@ -1928,7 +1935,8 @@ static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled) if (is_cxl_root(iter)) break; - rc = find_pos_and_ways(iter, range, &parent_pos, &parent_ways); + rc = find_pos_and_ways(iter, hpa_range, &parent_pos, + &parent_ways); if (rc) return rc; @@ -1938,7 +1946,7 @@ static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled) dev_dbg(&cxlmd->dev, "decoder:%s parent:%s port:%s range:%#llx-%#llx pos:%d\n", dev_name(&cxled->cxld.dev), dev_name(cxlmd->dev.parent), - dev_name(&port->dev), range->start, range->end, pos); + dev_name(&port->dev), hpa_range->start, hpa_range->end, pos); return pos; } @@ -1951,7 +1959,7 @@ static int cxl_region_sort_targets(struct cxl_region *cxlr) for (i = 0; i < p->nr_targets; i++) { struct cxl_endpoint_decoder *cxled = p->targets[i]; - cxled->pos = cxl_calc_interleave_pos(cxled); + cxled->pos = cxl_calc_interleave_pos(cxled, &cxlr->hpa_range); /* * Record that sorting failed, but still continue to calc * cxled->pos so that follow-on code paths can reliably @@ -1971,7 +1979,7 @@ static int cxl_region_sort_targets(struct cxl_region *cxlr) static int cxl_region_attach(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled, int pos) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_root_decoder *cxlrd = cxlr->cxlrd; struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_region_params *p = &cxlr->params; @@ -2076,8 +2084,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, ep_port = cxled_to_port(cxled); dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge); - rc = cxl_region_attach_position(cxlr, cxlrd, cxled, - dport, i); + rc = cxl_region_attach_position(cxlr, cxled, dport, i); if (rc) return rc; } @@ -2100,7 +2107,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, if (rc) return rc; - rc = cxl_region_attach_position(cxlr, cxlrd, cxled, dport, pos); + rc = cxl_region_attach_position(cxlr, cxled, dport, pos); if (rc) return rc; @@ -2136,7 +2143,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled = p->targets[i]; int test_pos; - test_pos = cxl_calc_interleave_pos(cxled); + test_pos = cxl_calc_interleave_pos(cxled, &cxlr->hpa_range); dev_dbg(&cxled->cxld.dev, "Test cxl_calc_interleave_pos(): %s test_pos:%d cxled->pos:%d\n", (test_pos == cxled->pos) ? "success" : "fail", @@ -2396,8 +2403,8 @@ static const struct attribute_group *region_groups[] = { static void cxl_region_release(struct device *dev) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent); struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_root_decoder *cxlrd = cxlr->cxlrd; int id = atomic_read(&cxlrd->region_id); /* @@ -2454,6 +2461,8 @@ static void unregister_region(void *_cxlr) for (i = 0; i < p->interleave_ways; i++) detach_target(cxlr, i); + cxlr->hpa_range = DEFINE_RANGE(0, -1); + cxl_region_iomem_release(cxlr); put_device(&cxlr->dev); } @@ -2480,11 +2489,13 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i * region id allocations */ get_device(dev->parent); + cxlr->cxlrd = cxlrd; + cxlr->id = id; + device_set_pm_not_required(dev); dev->bus = &cxl_bus_type; dev->type = &cxl_region_type; - cxlr->id = id; - cxl_region_set_lock(cxlr, &cxlrd->cxlsd.cxld); + cxl_region_setup_flags(cxlr, &cxlrd->cxlsd.cxld); return cxlr; } @@ -3112,17 +3123,157 @@ u64 cxl_calculate_hpa_offset(u64 dpa_offset, int pos, u8 eiw, u16 eig) } EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_hpa_offset, "cxl_translate"); +static int decode_pos(int region_ways, int hb_ways, int pos, int *pos_port, + int *pos_hb) +{ + int devices_per_hb; + + /* + * Decode for 3-6-12 way interleaves as defined in the CXL + * Spec 4.0 9.13.1.1 Legal Interleaving Configurations. + * Region creation should prevent invalid combinations but + * sanity check here to avoid a silent bad decode. + */ + switch (hb_ways) { + case 3: + if (region_ways != 3 && region_ways != 6 && region_ways != 12) + return -EINVAL; + break; + case 6: + if (region_ways != 6 && region_ways != 12) + return -EINVAL; + break; + case 12: + if (region_ways != 12) + return -EINVAL; + break; + default: + return -EINVAL; + } + /* + * Each host bridge contributes an equal number of endpoints + * that are laid out contiguously per host bridge. Modulo + * selects the port within a host bridge and division selects + * the host bridge position. + */ + devices_per_hb = region_ways / hb_ways; + *pos_port = pos % devices_per_hb; + *pos_hb = pos / devices_per_hb; + + return 0; +} + +/* + * restore_parent() reconstruct the address in parent + * + * This math, specifically the bitmask creation 'mask = gran - 1' relies + * on the CXL Spec requirement that interleave granularity is always a + * power of two. + * + * [mask] isolate the offset with the granularity + * [addr & ~mask] remove the offset leaving the aligned portion + * [* ways] distribute across all interleave ways + * [+ (pos * gran)] add the positional offset + * [+ (addr & mask)] restore the masked offset + */ +static u64 restore_parent(u64 addr, u64 pos, u64 gran, u64 ways) +{ + u64 mask = gran - 1; + + return ((addr & ~mask) * ways) + (pos * gran) + (addr & mask); +} + +/* + * unaligned_dpa_to_hpa() translates a DPA to HPA when the region resource + * start address is not aligned at Host Bridge Interleave Ways * 256MB. + * + * Unaligned start addresses only occur with MOD3 interleaves. All power- + * of-two interleaves are guaranteed aligned. + */ +static u64 unaligned_dpa_to_hpa(struct cxl_decoder *cxld, + struct cxl_region_params *p, int pos, u64 dpa) +{ + int ways_port = p->interleave_ways / cxld->interleave_ways; + int gran_port = p->interleave_granularity; + int gran_hb = cxld->interleave_granularity; + int ways_hb = cxld->interleave_ways; + int pos_port, pos_hb, gran_shift; + u64 hpa_port = 0; + + /* Decode an endpoint 'pos' into port and host-bridge components */ + if (decode_pos(p->interleave_ways, ways_hb, pos, &pos_port, &pos_hb)) { + dev_dbg(&cxld->dev, "not supported for region ways:%d\n", + p->interleave_ways); + return ULLONG_MAX; + } + + /* Restore the port parent address if needed */ + if (gran_hb != gran_port) + hpa_port = restore_parent(dpa, pos_port, gran_port, ways_port); + else + hpa_port = dpa; + + /* + * Complete the HPA reconstruction by restoring the address as if + * each HB position is a candidate. Test against expected pos_hb + * to confirm match. + */ + gran_shift = ilog2(gran_hb); + for (int position = 0; position < ways_hb; position++) { + u64 shifted, hpa; + + hpa = restore_parent(hpa_port, position, gran_hb, ways_hb); + hpa += p->res->start; + + shifted = hpa >> gran_shift; + if (do_div(shifted, ways_hb) == pos_hb) + return hpa; + } + + dev_dbg(&cxld->dev, "fail dpa:%#llx region:%pr pos:%d\n", dpa, p->res, + pos); + dev_dbg(&cxld->dev, " port-w/g/p:%d/%d/%d hb-w/g/p:%d/%d/%d\n", + ways_port, gran_port, pos_port, ways_hb, gran_hb, pos_hb); + + return ULLONG_MAX; +} + +static bool region_is_unaligned_mod3(struct cxl_region *cxlr) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; + struct cxl_region_params *p = &cxlr->params; + int hbiw = cxld->interleave_ways; + u64 rem; + + if (is_power_of_2(hbiw)) + return false; + + div64_u64_rem(p->res->start, (u64)hbiw * SZ_256M, &rem); + + return (rem != 0); +} + u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, u64 dpa) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_root_decoder *cxlrd = cxlr->cxlrd; + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; struct cxl_region_params *p = &cxlr->params; struct cxl_endpoint_decoder *cxled = NULL; u64 base, dpa_offset, hpa_offset, hpa; + bool unaligned = false; u16 eig = 0; u8 eiw = 0; int pos; + /* + * Conversion between SPA and DPA is not supported in + * Normalized Address mode. + */ + if (test_bit(CXL_REGION_F_NORMALIZED_ADDRESSING, &cxlr->flags)) + return ULLONG_MAX; + for (int i = 0; i < p->nr_targets; i++) { if (cxlmd == cxled_to_memdev(p->targets[i])) { cxled = p->targets[i]; @@ -3132,21 +3283,38 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, if (!cxled) return ULLONG_MAX; - pos = cxled->pos; - ways_to_eiw(p->interleave_ways, &eiw); - granularity_to_eig(p->interleave_granularity, &eig); - base = cxl_dpa_resource_start(cxled); if (base == RESOURCE_SIZE_MAX) return ULLONG_MAX; dpa_offset = dpa - base; + + /* Unaligned calc for MOD3 interleaves not hbiw * 256MB aligned */ + unaligned = region_is_unaligned_mod3(cxlr); + if (unaligned) { + hpa = unaligned_dpa_to_hpa(cxld, p, cxled->pos, dpa_offset); + if (hpa == ULLONG_MAX) + return ULLONG_MAX; + + goto skip_aligned; + } + /* + * Aligned calc for all power-of-2 interleaves and for MOD3 + * interleaves that are aligned at hbiw * 256MB + */ + pos = cxled->pos; + ways_to_eiw(p->interleave_ways, &eiw); + granularity_to_eig(p->interleave_granularity, &eig); + hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, eiw, eig); if (hpa_offset == ULLONG_MAX) return ULLONG_MAX; /* Apply the hpa_offset to the region base address */ - hpa = hpa_offset + p->res->start + p->cache_size; + hpa = hpa_offset + p->res->start; + +skip_aligned: + hpa += p->cache_size; /* Root decoder translation overrides typical modulo decode */ if (cxlrd->ops.hpa_to_spa) @@ -3160,9 +3328,9 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, "Addr trans fail: hpa 0x%llx not in region\n", hpa); return ULLONG_MAX; } - - /* Simple chunk check, by pos & gran, only applies to modulo decodes */ - if (!cxlrd->ops.hpa_to_spa && !cxl_is_hpa_in_chunk(hpa, cxlr, pos)) + /* Chunk check applies to aligned modulo decodes only */ + if (!unaligned && !cxlrd->ops.hpa_to_spa && + !cxl_is_hpa_in_chunk(hpa, cxlr, pos)) return ULLONG_MAX; return hpa; @@ -3173,11 +3341,54 @@ struct dpa_result { u64 dpa; }; +static int unaligned_region_offset_to_dpa_result(struct cxl_region *cxlr, + u64 offset, + struct dpa_result *result) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; + struct cxl_region_params *p = &cxlr->params; + u64 interleave_width, interleave_index; + u64 gran, gran_offset, dpa_offset; + u64 hpa = p->res->start + offset; + u64 tmp = offset; + + /* + * Unaligned addresses are not algebraically invertible. Calculate + * a dpa_offset independent of the target device and then enumerate + * and test that dpa_offset against each candidate endpoint decoder. + */ + gran = cxld->interleave_granularity; + interleave_width = gran * cxld->interleave_ways; + interleave_index = div64_u64(offset, interleave_width); + gran_offset = do_div(tmp, gran); + + dpa_offset = interleave_index * gran + gran_offset; + + for (int i = 0; i < p->nr_targets; i++) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; + int pos = cxled->pos; + u64 test_hpa; + + test_hpa = unaligned_dpa_to_hpa(cxld, p, pos, dpa_offset); + if (test_hpa == hpa) { + result->cxlmd = cxled_to_memdev(cxled); + result->dpa = + cxl_dpa_resource_start(cxled) + dpa_offset; + return 0; + } + } + dev_err(&cxlr->dev, + "failed to resolve HPA %#llx in unaligned MOD3 region\n", hpa); + + return -ENXIO; +} + static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset, struct dpa_result *result) { struct cxl_region_params *p = &cxlr->params; - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_root_decoder *cxlrd = cxlr->cxlrd; struct cxl_endpoint_decoder *cxled; u64 hpa_offset = offset; u64 dpa, dpa_offset; @@ -3206,6 +3417,10 @@ static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset, hpa_offset -= p->res->start; } + if (region_is_unaligned_mod3(cxlr)) + return unaligned_region_offset_to_dpa_result(cxlr, offset, + result); + pos = cxl_calculate_position(hpa_offset, eiw, eig); if (pos < 0 || pos >= p->nr_targets) { dev_dbg(&cxlr->dev, "Invalid position %d for %d targets\n", @@ -3478,47 +3693,68 @@ err: return rc; } -static int match_decoder_by_range(struct device *dev, const void *data) +static int match_root_decoder(struct device *dev, const void *data) { const struct range *r1, *r2 = data; - struct cxl_decoder *cxld; + struct cxl_root_decoder *cxlrd; - if (!is_switch_decoder(dev)) + if (!is_root_decoder(dev)) return 0; - cxld = to_cxl_decoder(dev); - r1 = &cxld->hpa_range; + cxlrd = to_cxl_root_decoder(dev); + r1 = &cxlrd->cxlsd.cxld.hpa_range; + return range_contains(r1, r2); } -static struct cxl_decoder * -cxl_port_find_switch_decoder(struct cxl_port *port, struct range *hpa) +static int cxl_root_setup_translation(struct cxl_root *cxl_root, + struct cxl_region_context *ctx) { - struct device *cxld_dev = device_find_child(&port->dev, hpa, - match_decoder_by_range); + if (!cxl_root->ops.translation_setup_root) + return 0; - return cxld_dev ? to_cxl_decoder(cxld_dev) : NULL; + return cxl_root->ops.translation_setup_root(cxl_root, ctx); } +/* + * Note, when finished with the device, drop the reference with + * put_device() or use the put_cxl_root_decoder helper. + */ static struct cxl_root_decoder * -cxl_find_root_decoder(struct cxl_endpoint_decoder *cxled) +get_cxl_root_decoder(struct cxl_endpoint_decoder *cxled, + struct cxl_region_context *ctx) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_port *port = cxled_to_port(cxled); struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port); - struct cxl_decoder *root, *cxld = &cxled->cxld; - struct range *hpa = &cxld->hpa_range; + struct device *cxlrd_dev; + int rc; + + /* + * Adjust the endpoint's HPA range and interleaving + * configuration to the root decoder’s memory space before + * setting up the root decoder. + */ + rc = cxl_root_setup_translation(cxl_root, ctx); + if (rc) { + dev_err(cxlmd->dev.parent, + "%s:%s Failed to setup translation for address range %#llx:%#llx\n", + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + ctx->hpa_range.start, ctx->hpa_range.end); + return ERR_PTR(rc); + } - root = cxl_port_find_switch_decoder(&cxl_root->port, hpa); - if (!root) { + cxlrd_dev = device_find_child(&cxl_root->port.dev, &ctx->hpa_range, + match_root_decoder); + if (!cxlrd_dev) { dev_err(cxlmd->dev.parent, "%s:%s no CXL window for range %#llx:%#llx\n", - dev_name(&cxlmd->dev), dev_name(&cxld->dev), - cxld->hpa_range.start, cxld->hpa_range.end); - return NULL; + dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), + ctx->hpa_range.start, ctx->hpa_range.end); + return ERR_PTR(-ENXIO); } - return to_cxl_root_decoder(&root->dev); + return to_cxl_root_decoder(cxlrd_dev); } static int match_region_by_range(struct device *dev, const void *data) @@ -3540,7 +3776,7 @@ static int match_region_by_range(struct device *dev, const void *data) static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, struct resource *res) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_root_decoder *cxlrd = cxlr->cxlrd; struct cxl_region_params *p = &cxlr->params; resource_size_t size = resource_size(res); resource_size_t cache_size, start; @@ -3576,11 +3812,12 @@ static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, } static int __construct_region(struct cxl_region *cxlr, - struct cxl_root_decoder *cxlrd, - struct cxl_endpoint_decoder *cxled) + struct cxl_region_context *ctx) { + struct cxl_endpoint_decoder *cxled = ctx->cxled; + struct cxl_root_decoder *cxlrd = cxlr->cxlrd; struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct range *hpa = &cxled->cxld.hpa_range; + struct range *hpa_range = &ctx->hpa_range; struct cxl_region_params *p; struct resource *res; int rc; @@ -3596,12 +3833,13 @@ static int __construct_region(struct cxl_region *cxlr, } set_bit(CXL_REGION_F_AUTO, &cxlr->flags); + cxlr->hpa_range = *hpa_range; res = kmalloc(sizeof(*res), GFP_KERNEL); if (!res) return -ENOMEM; - *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa), + *res = DEFINE_RES_MEM_NAMED(hpa_range->start, range_len(hpa_range), dev_name(&cxlr->dev)); rc = cxl_extended_linear_cache_resize(cxlr, res); @@ -3632,8 +3870,8 @@ static int __construct_region(struct cxl_region *cxlr, } p->res = res; - p->interleave_ways = cxled->cxld.interleave_ways; - p->interleave_granularity = cxled->cxld.interleave_granularity; + p->interleave_ways = ctx->interleave_ways; + p->interleave_granularity = ctx->interleave_granularity; p->state = CXL_CONFIG_INTERLEAVE_ACTIVE; rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group()); @@ -3653,8 +3891,9 @@ static int __construct_region(struct cxl_region *cxlr, /* Establish an empty region covering the given HPA range */ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, - struct cxl_endpoint_decoder *cxled) + struct cxl_region_context *ctx) { + struct cxl_endpoint_decoder *cxled = ctx->cxled; struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_port *port = cxlrd_to_port(cxlrd); struct cxl_dev_state *cxlds = cxlmd->cxlds; @@ -3674,7 +3913,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, return cxlr; } - rc = __construct_region(cxlr, cxlrd, cxled); + rc = __construct_region(cxlr, ctx); if (rc) { devm_release_action(port->uport_dev, unregister_region, cxlr); return ERR_PTR(rc); @@ -3684,11 +3923,12 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, } static struct cxl_region * -cxl_find_region_by_range(struct cxl_root_decoder *cxlrd, struct range *hpa) +cxl_find_region_by_range(struct cxl_root_decoder *cxlrd, + struct range *hpa_range) { struct device *region_dev; - region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa, + region_dev = device_find_child(&cxlrd->cxlsd.cxld.dev, hpa_range, match_region_by_range); if (!region_dev) return NULL; @@ -3698,25 +3938,34 @@ cxl_find_region_by_range(struct cxl_root_decoder *cxlrd, struct range *hpa) int cxl_add_to_region(struct cxl_endpoint_decoder *cxled) { - struct range *hpa = &cxled->cxld.hpa_range; + struct cxl_region_context ctx; struct cxl_region_params *p; bool attach = false; int rc; + ctx = (struct cxl_region_context) { + .cxled = cxled, + .hpa_range = cxled->cxld.hpa_range, + .interleave_ways = cxled->cxld.interleave_ways, + .interleave_granularity = cxled->cxld.interleave_granularity, + }; + struct cxl_root_decoder *cxlrd __free(put_cxl_root_decoder) = - cxl_find_root_decoder(cxled); - if (!cxlrd) - return -ENXIO; + get_cxl_root_decoder(cxled, &ctx); + + if (IS_ERR(cxlrd)) + return PTR_ERR(cxlrd); /* - * Ensure that if multiple threads race to construct_region() for @hpa - * one does the construction and the others add to that. + * Ensure that, if multiple threads race to construct_region() + * for the HPA range, one does the construction and the others + * add to that. */ mutex_lock(&cxlrd->range_lock); struct cxl_region *cxlr __free(put_cxl_region) = - cxl_find_region_by_range(cxlrd, hpa); + cxl_find_region_by_range(cxlrd, &ctx.hpa_range); if (!cxlr) - cxlr = construct_region(cxlrd, cxled); + cxlr = construct_region(cxlrd, &ctx); mutex_unlock(&cxlrd->range_lock); rc = PTR_ERR_OR_ZERO(cxlr); @@ -3891,6 +4140,39 @@ static int cxl_region_debugfs_poison_clear(void *data, u64 offset) DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL, cxl_region_debugfs_poison_clear, "%llx\n"); +static int cxl_region_setup_poison(struct cxl_region *cxlr) +{ + struct device *dev = &cxlr->dev; + struct cxl_region_params *p = &cxlr->params; + struct dentry *dentry; + + /* + * Do not enable poison injection in Normalized Address mode. + * Conversion between SPA and DPA is required for this, but it is + * not supported in this mode. + */ + if (test_bit(CXL_REGION_F_NORMALIZED_ADDRESSING, &cxlr->flags)) + return 0; + + /* Create poison attributes if all memdevs support the capabilities */ + for (int i = 0; i < p->nr_targets; i++) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + + if (!cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_INJECT) || + !cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_CLEAR)) + return 0; + } + + dentry = cxl_debugfs_create_dir(dev_name(dev)); + debugfs_create_file("inject_poison", 0200, dentry, cxlr, + &cxl_poison_inject_fops); + debugfs_create_file("clear_poison", 0200, dentry, cxlr, + &cxl_poison_clear_fops); + + return devm_add_action_or_reset(dev, remove_debugfs, dentry); +} + static int cxl_region_can_probe(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; @@ -3920,7 +4202,6 @@ static int cxl_region_probe(struct device *dev) { struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; - bool poison_supported = true; int rc; rc = cxl_region_can_probe(cxlr); @@ -3944,30 +4225,9 @@ static int cxl_region_probe(struct device *dev) if (rc) return rc; - /* Create poison attributes if all memdevs support the capabilities */ - for (int i = 0; i < p->nr_targets; i++) { - struct cxl_endpoint_decoder *cxled = p->targets[i]; - struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - - if (!cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_INJECT) || - !cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_CLEAR)) { - poison_supported = false; - break; - } - } - - if (poison_supported) { - struct dentry *dentry; - - dentry = cxl_debugfs_create_dir(dev_name(dev)); - debugfs_create_file("inject_poison", 0200, dentry, cxlr, - &cxl_poison_inject_fops); - debugfs_create_file("clear_poison", 0200, dentry, cxlr, - &cxl_poison_clear_fops); - rc = devm_add_action_or_reset(dev, remove_debugfs, dentry); - if (rc) - return rc; - } + rc = cxl_region_setup_poison(cxlr); + if (rc) + return rc; switch (cxlr->mode) { case CXL_PARTMODE_PMEM: diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 5ca7b0eed568..a010b3214342 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -271,10 +271,10 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, "CXL"); static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi, struct cxl_register_map *map) { - u8 reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo); - int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo); + u8 reg_type = FIELD_GET(PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_ID, reg_lo); + int bar = FIELD_GET(PCI_DVSEC_CXL_REG_LOCATOR_BIR, reg_lo); u64 offset = ((u64)reg_hi << 32) | - (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK); + (reg_lo & PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_OFF_LOW); if (offset > pci_resource_len(pdev, bar)) { dev_warn(&pdev->dev, @@ -311,15 +311,15 @@ static int __cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_ty }; regloc = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, - CXL_DVSEC_REG_LOCATOR); + PCI_DVSEC_CXL_REG_LOCATOR); if (!regloc) return -ENXIO; pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); - regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size); + regloc_size = PCI_DVSEC_HEADER1_LEN(regloc_size); - regloc += CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET; - regblocks = (regloc_size - CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET) / 8; + regloc += PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1; + regblocks = (regloc_size - PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1) / 8; for (i = 0; i < regblocks; i++, regloc += 8) { u32 reg_lo, reg_hi; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index ba17fa86d249..04c673e7cdb0 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -332,7 +332,7 @@ int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport); #define CXL_DECODER_F_TYPE3 BIT(3) #define CXL_DECODER_F_LOCK BIT(4) #define CXL_DECODER_F_ENABLE BIT(5) -#define CXL_DECODER_F_MASK GENMASK(5, 0) +#define CXL_DECODER_F_NORMALIZED_ADDRESSING BIT(6) enum cxl_decoder_type { CXL_DECODER_DEVMEM = 2, @@ -525,10 +525,19 @@ enum cxl_partition_mode { */ #define CXL_REGION_F_LOCK 2 +/* + * Indicate Normalized Addressing. Use it to disable SPA conversion if + * HPA != SPA and an address translation callback handler does not + * exist. Flag is needed by AMD Zen5 platforms. + */ +#define CXL_REGION_F_NORMALIZED_ADDRESSING 3 + /** * struct cxl_region - CXL region * @dev: This region's device * @id: This region's id. Id is globally unique across all regions + * @cxlrd: Region's root decoder + * @hpa_range: Address range occupied by the region * @mode: Operational mode of the mapped capacity * @type: Endpoint decoder target type * @cxl_nvb: nvdimm bridge for coordinating @cxlr_pmem setup / shutdown @@ -542,6 +551,8 @@ enum cxl_partition_mode { struct cxl_region { struct device dev; int id; + struct cxl_root_decoder *cxlrd; + struct range hpa_range; enum cxl_partition_mode mode; enum cxl_decoder_type type; struct cxl_nvdimm_bridge *cxl_nvb; @@ -607,6 +618,7 @@ struct cxl_dax_region { * @parent_dport: dport that points to this port in the parent * @decoder_ida: allocator for decoder ids * @reg_map: component and ras register mapping parameters + * @regs: mapped component registers * @nr_dports: number of entries in @dports * @hdm_end: track last allocated HDM decoder instance for allocation ordering * @commit_end: cursor to track highest committed decoder for commit ordering @@ -628,6 +640,7 @@ struct cxl_port { struct cxl_dport *parent_dport; struct ida decoder_ida; struct cxl_register_map reg_map; + struct cxl_component_regs regs; int nr_dports; int hdm_end; int commit_end; @@ -642,6 +655,15 @@ struct cxl_port { resource_size_t component_reg_phys; }; +struct cxl_root; + +struct cxl_root_ops { + int (*qos_class)(struct cxl_root *cxl_root, + struct access_coordinate *coord, int entries, + int *qos_class); + int (*translation_setup_root)(struct cxl_root *cxl_root, void *data); +}; + /** * struct cxl_root - logical collection of root cxl_port items * @@ -650,7 +672,7 @@ struct cxl_port { */ struct cxl_root { struct cxl_port port; - const struct cxl_root_ops *ops; + struct cxl_root_ops ops; }; static inline struct cxl_root * @@ -659,12 +681,6 @@ to_cxl_root(const struct cxl_port *port) return container_of(port, struct cxl_root, port); } -struct cxl_root_ops { - int (*qos_class)(struct cxl_root *cxl_root, - struct access_coordinate *coord, int entries, - int *qos_class); -}; - static inline struct cxl_dport * cxl_find_dport_by_dev(struct cxl_port *port, const struct device *dport_dev) { @@ -778,8 +794,9 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport_dev, resource_size_t component_reg_phys, struct cxl_dport *parent_dport); -struct cxl_root *devm_cxl_add_root(struct device *host, - const struct cxl_root_ops *ops); +struct cxl_root *devm_cxl_add_root(struct device *host); +int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, + struct cxl_dport *parent_dport); struct cxl_root *find_cxl_root(struct cxl_port *port); DEFINE_FREE(put_cxl_root, struct cxl_root *, if (_T) put_device(&_T->port.dev)) @@ -803,12 +820,11 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port, struct device *dport_dev, int port_id, resource_size_t rcrb); -#ifdef CONFIG_PCIEAER_CXL -void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport); -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host); +#ifdef CONFIG_CXL_ATL +void cxl_setup_prm_address_translation(struct cxl_root *cxl_root); #else -static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport, - struct device *host) { } +static inline +void cxl_setup_prm_address_translation(struct cxl_root *cxl_root) {} #endif struct cxl_decoder *to_cxl_decoder(struct device *dev); @@ -848,8 +864,11 @@ struct cxl_endpoint_dvsec_info { }; int devm_cxl_switch_port_decoders_setup(struct cxl_port *port); -int __devm_cxl_switch_port_decoders_setup(struct cxl_port *port); int devm_cxl_endpoint_decoders_setup(struct cxl_port *port); +void cxl_port_update_decoder_targets(struct cxl_port *port, + struct cxl_dport *dport); +int cxl_port_setup_regs(struct cxl_port *port, + resource_size_t component_reg_phys); struct cxl_dev_state; int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, @@ -859,10 +878,18 @@ bool is_cxl_region(struct device *dev); extern const struct bus_type cxl_bus_type; +/* + * Note, add_dport() is expressly for the cxl_port driver. TODO: investigate a + * type-safe driver model where probe()/remove() take the type of object implied + * by @id and the add_dport() op only defined for the CXL_DEVICE_PORT driver + * template. + */ struct cxl_driver { const char *name; int (*probe)(struct device *dev); void (*remove)(struct device *dev); + struct cxl_dport *(*add_dport)(struct cxl_port *port, + struct device *dport_dev); struct device_driver drv; int id; }; @@ -895,7 +922,8 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, struct cxl_port *port); struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm(struct device *dev); -int devm_cxl_add_nvdimm(struct cxl_port *parent_port, struct cxl_memdev *cxlmd); +int devm_cxl_add_nvdimm(struct device *host, struct cxl_port *port, + struct cxl_memdev *cxlmd); struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port); #ifdef CONFIG_CXL_REGION @@ -946,8 +974,6 @@ void cxl_coordinates_combine(struct access_coordinate *out, bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port, struct device *dport_dev); -struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port, - struct device *dport_dev); /* * Unit test builds overrides this to __weak, find the 'strong' version @@ -959,20 +985,4 @@ struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port, u16 cxl_gpf_get_dvsec(struct device *dev); -/* - * Declaration for functions that are mocked by cxl_test that are called by - * cxl_core. The respective functions are defined as __foo() and called by - * cxl_core as foo(). The macros below ensures that those functions would - * exist as foo(). See tools/testing/cxl/cxl_core_exports.c and - * tools/testing/cxl/exports.h for setting up the mock functions. The dance - * is done to avoid a circular dependency where cxl_core calls a function that - * ends up being a mock function and goes to * cxl_test where it calls a - * cxl_core function. - */ -#ifndef CXL_TEST_ENABLE -#define DECLARE_TESTABLE(x) __##x -#define devm_cxl_add_dport_by_dev DECLARE_TESTABLE(devm_cxl_add_dport_by_dev) -#define devm_cxl_switch_port_decoders_setup DECLARE_TESTABLE(devm_cxl_switch_port_decoders_setup) -#endif - #endif /* __CXL_H__ */ diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 434031a0c1f7..e21d744d639b 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -34,6 +34,10 @@ (FIELD_GET(CXLMDEV_RESET_NEEDED_MASK, status) != \ CXLMDEV_RESET_NEEDED_NOT) +struct cxl_memdev_attach { + int (*probe)(struct cxl_memdev *cxlmd); +}; + /** * struct cxl_memdev - CXL bus object representing a Type-3 Memory Device * @dev: driver core device object @@ -43,6 +47,7 @@ * @cxl_nvb: coordinate removal of @cxl_nvd if present * @cxl_nvd: optional bridge to an nvdimm if the device supports pmem * @endpoint: connection to the CXL port topology for this memory device + * @attach: creator of this memdev depends on CXL link attach to operate * @id: id number of this memdev instance. * @depth: endpoint port depth * @scrub_cycle: current scrub cycle set for this device @@ -59,11 +64,12 @@ struct cxl_memdev { struct cxl_nvdimm_bridge *cxl_nvb; struct cxl_nvdimm *cxl_nvd; struct cxl_port *endpoint; + const struct cxl_memdev_attach *attach; int id; int depth; u8 scrub_cycle; int scrub_region_id; - void *err_rec_array; + struct cxl_mem_err_rec *err_rec_array; }; static inline struct cxl_memdev *to_cxl_memdev(struct device *dev) @@ -95,8 +101,10 @@ static inline bool is_cxl_endpoint(struct cxl_port *port) return is_cxl_memdev(port->uport_dev); } -struct cxl_memdev *devm_cxl_add_memdev(struct device *host, - struct cxl_dev_state *cxlds); +struct cxl_memdev *__devm_cxl_add_memdev(struct cxl_dev_state *cxlds, + const struct cxl_memdev_attach *attach); +struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds, + const struct cxl_memdev_attach *attach); int devm_cxl_sanitize_setup_notifier(struct device *host, struct cxl_memdev *cxlmd); struct cxl_memdev_state; @@ -415,7 +423,7 @@ struct cxl_dpa_partition { * @dev: The device associated with this CXL state * @cxlmd: The device representing the CXL.mem capabilities of @dev * @reg_map: component and ras register mapping parameters - * @regs: Parsed register blocks + * @regs: Class device "Device" registers * @cxl_dvsec: Offset to the PCIe device DVSEC * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) * @media_ready: Indicate whether the device media is usable @@ -431,7 +439,7 @@ struct cxl_dev_state { struct device *dev; struct cxl_memdev *cxlmd; struct cxl_register_map reg_map; - struct cxl_regs regs; + struct cxl_device_regs regs; int cxl_dvsec; bool rcd; bool media_ready; @@ -877,7 +885,6 @@ int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd); int devm_cxl_region_edac_register(struct cxl_region *cxlr); int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt); int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt); -void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd); #else static inline int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd) { return 0; } @@ -889,8 +896,6 @@ static inline int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, static inline int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt) { return 0; } -static inline void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd) -{ return; } #endif #ifdef CONFIG_CXL_SUSPEND diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 1d526bea8431..0cf64218aa16 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -8,59 +8,6 @@ #define CXL_MEMORY_PROGIF 0x10 /* - * See section 8.1 Configuration Space Registers in the CXL 2.0 - * Specification. Names are taken straight from the specification with "CXL" and - * "DVSEC" redundancies removed. When obvious, abbreviations may be used. - */ -#define PCI_DVSEC_HEADER1_LENGTH_MASK GENMASK(31, 20) - -/* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ -#define CXL_DVSEC_PCIE_DEVICE 0 -#define CXL_DVSEC_CAP_OFFSET 0xA -#define CXL_DVSEC_MEM_CAPABLE BIT(2) -#define CXL_DVSEC_HDM_COUNT_MASK GENMASK(5, 4) -#define CXL_DVSEC_CTRL_OFFSET 0xC -#define CXL_DVSEC_MEM_ENABLE BIT(2) -#define CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) -#define CXL_DVSEC_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) -#define CXL_DVSEC_MEM_INFO_VALID BIT(0) -#define CXL_DVSEC_MEM_ACTIVE BIT(1) -#define CXL_DVSEC_MEM_SIZE_LOW_MASK GENMASK(31, 28) -#define CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10)) -#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) -#define CXL_DVSEC_MEM_BASE_LOW_MASK GENMASK(31, 28) - -#define CXL_DVSEC_RANGE_MAX 2 - -/* CXL 2.0 8.1.4: Non-CXL Function Map DVSEC */ -#define CXL_DVSEC_FUNCTION_MAP 2 - -/* CXL 2.0 8.1.5: CXL 2.0 Extensions DVSEC for Ports */ -#define CXL_DVSEC_PORT_EXTENSIONS 3 - -/* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */ -#define CXL_DVSEC_PORT_GPF 4 -#define CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C -#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK GENMASK(3, 0) -#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK GENMASK(11, 8) -#define CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE -#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK GENMASK(3, 0) -#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK GENMASK(11, 8) - -/* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */ -#define CXL_DVSEC_DEVICE_GPF 5 - -/* CXL 2.0 8.1.8: PCIe DVSEC for Flex Bus Port */ -#define CXL_DVSEC_PCIE_FLEXBUS_PORT 7 - -/* CXL 2.0 8.1.9: Register Locator DVSEC */ -#define CXL_DVSEC_REG_LOCATOR 8 -#define CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET 0xC -#define CXL_DVSEC_REG_LOCATOR_BIR_MASK GENMASK(2, 0) -#define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK GENMASK(15, 8) -#define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK GENMASK(31, 16) - -/* * NOTE: Currently all the functions which are enabled for CXL require their * vectors to be in the first 16. Use this as the default max. */ @@ -129,7 +76,29 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev) struct cxl_dev_state; void read_cdat_data(struct cxl_port *port); + +#ifdef CONFIG_CXL_RAS void cxl_cor_error_detected(struct pci_dev *pdev); pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, pci_channel_state_t state); +void devm_cxl_dport_rch_ras_setup(struct cxl_dport *dport); +void devm_cxl_port_ras_setup(struct cxl_port *port); +#else +static inline void cxl_cor_error_detected(struct pci_dev *pdev) { } + +static inline pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + return PCI_ERS_RESULT_NONE; +} + +static inline void devm_cxl_dport_rch_ras_setup(struct cxl_dport *dport) +{ +} + +static inline void devm_cxl_port_ras_setup(struct cxl_port *port) +{ +} +#endif + #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 6e6777b7bafb..fcffe24dcb42 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -45,44 +45,6 @@ static int cxl_mem_dpa_show(struct seq_file *file, void *data) return 0; } -static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, - struct cxl_dport *parent_dport) -{ - struct cxl_port *parent_port = parent_dport->port; - struct cxl_port *endpoint, *iter, *down; - int rc; - - /* - * Now that the path to the root is established record all the - * intervening ports in the chain. - */ - for (iter = parent_port, down = NULL; !is_cxl_root(iter); - down = iter, iter = to_cxl_port(iter->dev.parent)) { - struct cxl_ep *ep; - - ep = cxl_ep_load(iter, cxlmd); - ep->next = down; - } - - /* Note: endpoint port component registers are derived from @cxlds */ - endpoint = devm_cxl_add_port(host, &cxlmd->dev, CXL_RESOURCE_NONE, - parent_dport); - if (IS_ERR(endpoint)) - return PTR_ERR(endpoint); - - rc = cxl_endpoint_autoremove(cxlmd, endpoint); - if (rc) - return rc; - - if (!endpoint->dev.driver) { - dev_err(&cxlmd->dev, "%s failed probe\n", - dev_name(&endpoint->dev)); - return -ENXIO; - } - - return 0; -} - static int cxl_debugfs_poison_inject(void *data, u64 dpa) { struct cxl_memdev *cxlmd = data; @@ -153,7 +115,7 @@ static int cxl_mem_probe(struct device *dev) } if (cxl_pmem_size(cxlds) && IS_ENABLED(CONFIG_CXL_PMEM)) { - rc = devm_cxl_add_nvdimm(parent_port, cxlmd); + rc = devm_cxl_add_nvdimm(dev, parent_port, cxlmd); if (rc) { if (rc == -ENODEV) dev_info(dev, "PMEM disabled by platform\n"); @@ -166,8 +128,6 @@ static int cxl_mem_probe(struct device *dev) else endpoint_parent = &parent_port->dev; - cxl_dport_init_ras_reporting(dport, dev); - scoped_guard(device, endpoint_parent) { if (!endpoint_parent->driver) { dev_err(dev, "CXL port topology %s not enabled\n", @@ -180,6 +140,12 @@ static int cxl_mem_probe(struct device *dev) return rc; } + if (cxlmd->attach) { + rc = cxlmd->attach->probe(cxlmd); + if (rc) + return rc; + } + rc = devm_cxl_memdev_edac_register(cxlmd); if (rc) dev_dbg(dev, "CXL memdev EDAC registration failed rc=%d\n", rc); @@ -201,6 +167,29 @@ static int cxl_mem_probe(struct device *dev) return devm_add_action_or_reset(dev, enable_suspend, NULL); } +/** + * devm_cxl_add_memdev - Add a CXL memory device + * @cxlds: CXL device state to associate with the memdev + * @attach: Caller depends on CXL topology attachment + * + * Upon return the device will have had a chance to attach to the + * cxl_mem driver, but may fail to attach if the CXL topology is not ready + * (hardware CXL link down, or software platform CXL root not attached). + * + * When @attach is NULL it indicates the caller wants the memdev to remain + * registered even if it does not immediately attach to the CXL hierarchy. When + * @attach is provided a cxl_mem_probe() failure leads to failure of this routine. + * + * The parent of the resulting device and the devm context for allocations is + * @cxlds->dev. + */ +struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds, + const struct cxl_memdev_attach *attach) +{ + return __devm_cxl_add_memdev(cxlds, attach); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, "CXL"); + static ssize_t trigger_poison_list_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) @@ -248,6 +237,7 @@ static struct cxl_driver cxl_mem_driver = { .probe = cxl_mem_probe, .id = CXL_DEVICE_MEMORY_EXPANDER, .drv = { + .probe_type = PROBE_FORCE_SYNCHRONOUS, .dev_groups = cxl_mem_groups, }, }; @@ -258,8 +248,3 @@ MODULE_DESCRIPTION("CXL: Memory Expansion"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS("CXL"); MODULE_ALIAS_CXL(CXL_DEVICE_MEMORY_EXPANDER); -/* - * create_endpoint() wants to validate port driver attach immediately after - * endpoint registration. - */ -MODULE_SOFTDEP("pre: cxl_port"); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 0be4e508affe..fbb300a01830 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -535,52 +535,6 @@ static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, return cxl_setup_regs(map); } -static int cxl_pci_ras_unmask(struct pci_dev *pdev) -{ - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - void __iomem *addr; - u32 orig_val, val, mask; - u16 cap; - int rc; - - if (!cxlds->regs.ras) { - dev_dbg(&pdev->dev, "No RAS registers.\n"); - return 0; - } - - /* BIOS has PCIe AER error control */ - if (!pcie_aer_is_native(pdev)) - return 0; - - rc = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap); - if (rc) - return rc; - - if (cap & PCI_EXP_DEVCTL_URRE) { - addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET; - orig_val = readl(addr); - - mask = CXL_RAS_UNCORRECTABLE_MASK_MASK | - CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK; - val = orig_val & ~mask; - writel(val, addr); - dev_dbg(&pdev->dev, - "Uncorrectable RAS Errors Mask: %#x -> %#x\n", - orig_val, val); - } - - if (cap & PCI_EXP_DEVCTL_CERE) { - addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET; - orig_val = readl(addr); - val = orig_val & ~CXL_RAS_CORRECTABLE_MASK_MASK; - writel(val, addr); - dev_dbg(&pdev->dev, "Correctable RAS Errors Mask: %#x -> %#x\n", - orig_val, val); - } - - return 0; -} - static void free_event_buf(void *buf) { kvfree(buf); @@ -912,13 +866,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) unsigned int i; bool irq_avail; - /* - * Double check the anonymous union trickery in struct cxl_regs - * FIXME switch to struct_group() - */ - BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) != - offsetof(struct cxl_regs, device_regs.memdev)); - rc = pcim_enable_device(pdev); if (rc) return rc; @@ -933,7 +880,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) cxlds->rcd = is_cxl_restricted(pdev); cxlds->serial = pci_get_dsn(pdev); cxlds->cxl_dvsec = pci_find_dvsec_capability( - pdev, PCI_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); + pdev, PCI_VENDOR_ID_CXL, PCI_DVSEC_CXL_DEVICE); if (!cxlds->cxl_dvsec) dev_warn(&pdev->dev, "Device DVSEC not present, skip CXL.mem init\n"); @@ -942,7 +889,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - rc = cxl_map_device_regs(&map, &cxlds->regs.device_regs); + rc = cxl_map_device_regs(&map, &cxlds->regs); if (rc) return rc; @@ -957,11 +904,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) else if (!cxlds->reg_map.component_map.ras.valid) dev_dbg(&pdev->dev, "RAS registers not found\n"); - rc = cxl_map_component_regs(&cxlds->reg_map, &cxlds->regs.component, - BIT(CXL_CM_CAP_CAP_ID_RAS)); - if (rc) - dev_dbg(&pdev->dev, "Failed to map RAS capability.\n"); - rc = cxl_pci_type3_init_mailbox(cxlds); if (rc) return rc; @@ -1006,7 +948,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) dev_dbg(&pdev->dev, "No CXL Features discovered\n"); - cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); + cxlmd = devm_cxl_add_memdev(cxlds, NULL); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); @@ -1052,9 +994,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - if (cxl_pci_ras_unmask(pdev)) - dev_dbg(&pdev->dev, "No RAS reporting unmasked\n"); - pci_save_state(pdev); return rc; diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 51c8f2f84717..ada51948d52f 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include <linux/aer.h> #include <linux/device.h> #include <linux/module.h> #include <linux/slab.h> @@ -68,9 +69,59 @@ static int cxl_switch_port_probe(struct cxl_port *port) return 0; } +static int cxl_ras_unmask(struct cxl_port *port) +{ + struct pci_dev *pdev; + void __iomem *addr; + u32 orig_val, val, mask; + u16 cap; + int rc; + + if (!dev_is_pci(port->uport_dev)) + return 0; + pdev = to_pci_dev(port->uport_dev); + + if (!port->regs.ras) { + pci_dbg(pdev, "No RAS registers.\n"); + return 0; + } + + /* BIOS has PCIe AER error control */ + if (!pcie_aer_is_native(pdev)) + return 0; + + rc = pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &cap); + if (rc) + return rc; + + if (cap & PCI_EXP_DEVCTL_URRE) { + addr = port->regs.ras + CXL_RAS_UNCORRECTABLE_MASK_OFFSET; + orig_val = readl(addr); + + mask = CXL_RAS_UNCORRECTABLE_MASK_MASK | + CXL_RAS_UNCORRECTABLE_MASK_F256B_MASK; + val = orig_val & ~mask; + writel(val, addr); + pci_dbg(pdev, "Uncorrectable RAS Errors Mask: %#x -> %#x\n", + orig_val, val); + } + + if (cap & PCI_EXP_DEVCTL_CERE) { + addr = port->regs.ras + CXL_RAS_CORRECTABLE_MASK_OFFSET; + orig_val = readl(addr); + val = orig_val & ~CXL_RAS_CORRECTABLE_MASK_MASK; + writel(val, addr); + pci_dbg(pdev, "Correctable RAS Errors Mask: %#x -> %#x\n", + orig_val, val); + } + + return 0; +} + static int cxl_endpoint_port_probe(struct cxl_port *port) { struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); + struct cxl_dport *dport = port->parent_dport; int rc; /* Cache the data early to ensure is_visible() works */ @@ -87,6 +138,21 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) return rc; /* + * With VH (CXL Virtual Host) topology the cxl_port::add_dport() method + * handles RAS setup for downstream ports. With RCH (CXL Restricted CXL + * Host) topologies the downstream port is enumerated early by platform + * firmware, but the RCRB (root complex register block) is not mapped + * until after the cxl_pci driver attaches to the RCIeP (root complex + * integrated endpoint). + */ + if (dport->rch) + devm_cxl_dport_rch_ras_setup(dport); + + devm_cxl_port_ras_setup(port); + if (cxl_ras_unmask(port)) + dev_dbg(&port->dev, "failed to unmask RAS interrupts\n"); + + /* * Now that all endpoint decoders are successfully enumerated, try to * assemble regions from committed decoders */ @@ -151,15 +217,111 @@ static const struct attribute_group *cxl_port_attribute_groups[] = { NULL, }; +/* note this implicitly casts the group back to its @port */ +DEFINE_FREE(cxl_port_release_dr_group, struct cxl_port *, + if (_T) devres_release_group(&_T->dev, _T)) + +static struct cxl_dport *cxl_port_add_dport(struct cxl_port *port, + struct device *dport_dev) +{ + struct cxl_dport *dport; + int rc; + + /* Temp group for all "first dport" and "per dport" setup actions */ + void *port_dr_group __free(cxl_port_release_dr_group) = + devres_open_group(&port->dev, port, GFP_KERNEL); + if (!port_dr_group) + return ERR_PTR(-ENOMEM); + + if (port->nr_dports == 0) { + /* + * Some host bridges are known to not have component regsisters + * available until a root port has trained CXL. Perform that + * setup now. + */ + rc = cxl_port_setup_regs(port, port->component_reg_phys); + if (rc) + return ERR_PTR(rc); + + rc = devm_cxl_switch_port_decoders_setup(port); + if (rc) + return ERR_PTR(rc); + + /* + * RAS setup is optional, either driver operation can continue + * on failure, or the device does not implement RAS registers. + */ + devm_cxl_port_ras_setup(port); + } + + dport = devm_cxl_add_dport_by_dev(port, dport_dev); + if (IS_ERR(dport)) + return dport; + + /* This group was only needed for early exit above */ + devres_remove_group(&port->dev, no_free_ptr(port_dr_group)); + + cxl_switch_parse_cdat(dport); + + /* New dport added, update the decoder targets */ + cxl_port_update_decoder_targets(port, dport); + + dev_dbg(&port->dev, "dport%d:%s added\n", dport->port_id, + dev_name(dport_dev)); + + return dport; +} + static struct cxl_driver cxl_port_driver = { .name = "cxl_port", .probe = cxl_port_probe, + .add_dport = cxl_port_add_dport, .id = CXL_DEVICE_PORT, .drv = { + .probe_type = PROBE_FORCE_SYNCHRONOUS, .dev_groups = cxl_port_attribute_groups, }, }; +int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, + struct cxl_dport *parent_dport) +{ + struct cxl_port *parent_port = parent_dport->port; + struct cxl_port *endpoint, *iter, *down; + int rc; + + /* + * Now that the path to the root is established record all the + * intervening ports in the chain. + */ + for (iter = parent_port, down = NULL; !is_cxl_root(iter); + down = iter, iter = to_cxl_port(iter->dev.parent)) { + struct cxl_ep *ep; + + ep = cxl_ep_load(iter, cxlmd); + ep->next = down; + } + + /* Note: endpoint port component registers are derived from @cxlds */ + endpoint = devm_cxl_add_port(host, &cxlmd->dev, CXL_RESOURCE_NONE, + parent_dport); + if (IS_ERR(endpoint)) + return PTR_ERR(endpoint); + + rc = cxl_endpoint_autoremove(cxlmd, endpoint); + if (rc) + return rc; + + if (!endpoint->dev.driver) { + dev_err(&cxlmd->dev, "%s failed probe\n", + dev_name(&endpoint->dev)); + return -ENXIO; + } + + return 0; +} +EXPORT_SYMBOL_FOR_MODULES(devm_cxl_add_endpoint, "cxl_mem"); + static int __init cxl_port_init(void) { return cxl_driver_register(&cxl_port_driver); |
