diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-04 11:55:13 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-04 11:55:13 -0700 |
commit | 0c66a95c7e014abc3489e69dd3972d9225027d49 (patch) | |
tree | 1037aa65e78b4cad8f51ac7d48a0fb03a81d2837 /drivers | |
parent | 855ff900b8605df2b9ea309534cd2f02dc0c4cb8 (diff) | |
parent | 4ad6181e4b216ed0cb52f45d3c6d2c70c8ae9243 (diff) | |
download | lwn-0c66a95c7e014abc3489e69dd3972d9225027d49.tar.gz lwn-0c66a95c7e014abc3489e69dd3972d9225027d49.zip |
Merge tag 'cxl-for-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull CXL (Compute Express Link) updates from Dan Williams:
"This subsystem is still in the build-out phase as the bulk of the
update is improvements to enumeration and fleshing out the device
model. In terms of new features, more mailbox commands have been added
to the allowed-list in support of persistent memory provisioning
support targeting v5.15.
The critical update from an enumeration perspective is support for the
CXL Fixed Memory Window Structure that indicates to Linux which system
physical address ranges decode to the CXL Host Bridges in the system.
This allows the driver to detect which address ranges have been mapped
by firmware and what address ranges are available for future hotplug.
So, again, mostly skeleton this round, with more meat targeting v5.15.
Summary:
- Add support for the CXL Fixed Memory Window Structure, a recent
extension of the ACPI CEDT (CXL Early Discovery Table)
- Add infrastructure for component registers
- Add HDM (Host-managed device memory) decoder definitions
- Define a device model for an HDM decoder tree
- Bridge CXL persistent memory capabilities to an NVDIMM bus /
device-model
- Switch to fine grained mapping of CXL MMIO registers to allow
different drivers / system software to own individual register
blocks
- Enable media provisioning commands, and publish the label storage
area size in sysfs
- Miscellaneous cleanups and fixes"
* tag 'cxl-for-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: (34 commits)
cxl/pci: Rename CXL REGLOC ID
cxl/acpi: Use the ACPI CFMWS to create static decoder objects
cxl/acpi: Add the Host Bridge base address to CXL port objects
cxl/pmem: Register 'pmem' / cxl_nvdimm devices
libnvdimm: Drop unused device power management support
libnvdimm: Export nvdimm shutdown helper, nvdimm_delete()
cxl/pmem: Add initial infrastructure for pmem support
cxl/core: Add cxl-bus driver infrastructure
cxl/pci: Add media provisioning required commands
cxl/component_regs: Fix offset
cxl/hdm: Fix decoder count calculation
cxl/acpi: Introduce cxl_decoder objects
cxl/acpi: Enumerate host bridge root ports
cxl/acpi: Add downstream port data to cxl_port instances
cxl/Kconfig: Default drivers to CONFIG_CXL_BUS
cxl/acpi: Introduce the root of a cxl_port topology
cxl/pci: Fixup devm_cxl_iomap_block() to take a 'struct device *'
cxl/pci: Add HDM decoder capabilities
cxl/pci: Reserve individual register block regions
cxl/pci: Map registers based on capabilities
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/cxl/Kconfig | 43 | ||||
-rw-r--r-- | drivers/cxl/Makefile | 12 | ||||
-rw-r--r-- | drivers/cxl/acpi.c | 434 | ||||
-rw-r--r-- | drivers/cxl/bus.c | 29 | ||||
-rw-r--r-- | drivers/cxl/core.c | 1067 | ||||
-rw-r--r-- | drivers/cxl/cxl.h | 332 | ||||
-rw-r--r-- | drivers/cxl/mem.h | 81 | ||||
-rw-r--r-- | drivers/cxl/pci.c (renamed from drivers/cxl/mem.c) | 439 | ||||
-rw-r--r-- | drivers/cxl/pci.h | 2 | ||||
-rw-r--r-- | drivers/cxl/pmem.c | 230 | ||||
-rw-r--r-- | drivers/nvdimm/bus.c | 64 | ||||
-rw-r--r-- | drivers/nvdimm/dimm_devs.c | 18 |
12 files changed, 2467 insertions, 284 deletions
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 97dc4d751651..e6de221cc568 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -15,21 +15,17 @@ if CXL_BUS config CXL_MEM tristate "CXL.mem: Memory Devices" + default CXL_BUS help The CXL.mem protocol allows a device to act as a provider of "System RAM" and/or "Persistent Memory" that is fully coherent as if the memory was attached to the typical CPU memory controller. - Say 'y/m' to enable a driver (named "cxl_mem.ko" when built as - a module) that will attach to CXL.mem devices for - configuration, provisioning, and health monitoring. This - driver is required for dynamic provisioning of CXL.mem - attached memory which is a prerequisite for persistent memory - support. Typically volatile memory is mapped by platform - firmware and included in the platform memory map, but in some - cases the OS is responsible for mapping that memory. See - Chapter 2.3 Type 3 CXL Device in the CXL 2.0 specification. + Say 'y/m' to enable a driver that will attach to CXL.mem devices for + configuration and management primarily via the mailbox interface. See + Chapter 2.3 Type 3 CXL Device in the CXL 2.0 specification for more + details. If unsure say 'm'. @@ -50,4 +46,33 @@ config CXL_MEM_RAW_COMMANDS potential impact to memory currently in use by the kernel. If developing CXL hardware or the driver say Y, otherwise say N. + +config CXL_ACPI + tristate "CXL ACPI: Platform Support" + depends on ACPI + default CXL_BUS + help + Enable support for host managed device memory (HDM) resources + published by a platform's ACPI CXL memory layout description. See + Chapter 9.14.1 CXL Early Discovery Table (CEDT) in the CXL 2.0 + specification, and CXL Fixed Memory Window Structures (CEDT.CFMWS) + (https://www.computeexpresslink.org/spec-landing). The CXL core + consumes these resource to publish the root of a cxl_port decode + hierarchy to map regions that represent System RAM, or Persistent + Memory regions to be managed by LIBNVDIMM. + + If unsure say 'm'. + +config CXL_PMEM + tristate "CXL PMEM: Persistent Memory Support" + depends on LIBNVDIMM + default CXL_BUS + help + In addition to typical memory resources a platform may also advertise + support for persistent memory attached via CXL. This support is + managed via a bridge driver from CXL to the LIBNVDIMM system + subsystem. Say 'y/m' to enable support for enumerating and + provisioning the persistent memory capacity of CXL memory expanders. + + If unsure say 'm'. endif diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile index a314a1891f4d..32954059b37b 100644 --- a/drivers/cxl/Makefile +++ b/drivers/cxl/Makefile @@ -1,7 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_CXL_BUS) += cxl_bus.o -obj-$(CONFIG_CXL_MEM) += cxl_mem.o +obj-$(CONFIG_CXL_BUS) += cxl_core.o +obj-$(CONFIG_CXL_MEM) += cxl_pci.o +obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o +obj-$(CONFIG_CXL_PMEM) += cxl_pmem.o ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=CXL -cxl_bus-y := bus.o -cxl_mem-y := mem.o +cxl_core-y := core.o +cxl_pci-y := pci.o +cxl_acpi-y := acpi.o +cxl_pmem-y := pmem.o diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c new file mode 100644 index 000000000000..8ae89273f58e --- /dev/null +++ b/drivers/cxl/acpi.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2021 Intel Corporation. All rights reserved. */ +#include <linux/platform_device.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/acpi.h> +#include <linux/pci.h> +#include "cxl.h" + +static struct acpi_table_header *acpi_cedt; + +/* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */ +#define CFMWS_INTERLEAVE_WAYS(x) (1 << (x)->interleave_ways) +#define CFMWS_INTERLEAVE_GRANULARITY(x) ((x)->granularity + 8) + +static unsigned long cfmws_to_decoder_flags(int restrictions) +{ + unsigned long flags = 0; + + if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE2) + flags |= CXL_DECODER_F_TYPE2; + if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE3) + flags |= CXL_DECODER_F_TYPE3; + if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_VOLATILE) + flags |= CXL_DECODER_F_RAM; + if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_PMEM) + flags |= CXL_DECODER_F_PMEM; + if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_FIXED) + flags |= CXL_DECODER_F_LOCK; + + return flags; +} + +static int cxl_acpi_cfmws_verify(struct device *dev, + struct acpi_cedt_cfmws *cfmws) +{ + int expected_len; + + if (cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) { + dev_err(dev, "CFMWS Unsupported Interleave Arithmetic\n"); + return -EINVAL; + } + + if (!IS_ALIGNED(cfmws->base_hpa, SZ_256M)) { + dev_err(dev, "CFMWS Base HPA not 256MB aligned\n"); + return -EINVAL; + } + + if (!IS_ALIGNED(cfmws->window_size, SZ_256M)) { + dev_err(dev, "CFMWS Window Size not 256MB aligned\n"); + return -EINVAL; + } + + expected_len = struct_size((cfmws), interleave_targets, + CFMWS_INTERLEAVE_WAYS(cfmws)); + + if (cfmws->header.length < expected_len) { + dev_err(dev, "CFMWS length %d less than expected %d\n", + cfmws->header.length, expected_len); + return -EINVAL; + } + + if (cfmws->header.length > expected_len) + dev_dbg(dev, "CFMWS length %d greater than expected %d\n", + cfmws->header.length, expected_len); + + return 0; +} + +static void cxl_add_cfmws_decoders(struct device *dev, + struct cxl_port *root_port) +{ + struct acpi_cedt_cfmws *cfmws; + struct cxl_decoder *cxld; + acpi_size len, cur = 0; + void *cedt_subtable; + unsigned long flags; + int rc; + + len = acpi_cedt->length - sizeof(*acpi_cedt); + cedt_subtable = acpi_cedt + 1; + + while (cur < len) { + struct acpi_cedt_header *c = cedt_subtable + cur; + + if (c->type != ACPI_CEDT_TYPE_CFMWS) { + cur += c->length; + continue; + } + + cfmws = cedt_subtable + cur; + + if (cfmws->header.length < sizeof(*cfmws)) { + dev_warn_once(dev, + "CFMWS entry skipped:invalid length:%u\n", + cfmws->header.length); + cur += c->length; + continue; + } + + rc = cxl_acpi_cfmws_verify(dev, cfmws); + if (rc) { + dev_err(dev, "CFMWS range %#llx-%#llx not registered\n", + cfmws->base_hpa, cfmws->base_hpa + + cfmws->window_size - 1); + cur += c->length; + continue; + } + + flags = cfmws_to_decoder_flags(cfmws->restrictions); + cxld = devm_cxl_add_decoder(dev, root_port, + CFMWS_INTERLEAVE_WAYS(cfmws), + cfmws->base_hpa, cfmws->window_size, + CFMWS_INTERLEAVE_WAYS(cfmws), + CFMWS_INTERLEAVE_GRANULARITY(cfmws), + CXL_DECODER_EXPANDER, + flags); + + if (IS_ERR(cxld)) { + dev_err(dev, "Failed to add decoder for %#llx-%#llx\n", + cfmws->base_hpa, cfmws->base_hpa + + cfmws->window_size - 1); + } else { + dev_dbg(dev, "add: %s range %#llx-%#llx\n", + dev_name(&cxld->dev), cfmws->base_hpa, + cfmws->base_hpa + cfmws->window_size - 1); + } + cur += c->length; + } +} + +static struct acpi_cedt_chbs *cxl_acpi_match_chbs(struct device *dev, u32 uid) +{ + struct acpi_cedt_chbs *chbs, *chbs_match = NULL; + acpi_size len, cur = 0; + void *cedt_subtable; + + len = acpi_cedt->length - sizeof(*acpi_cedt); + cedt_subtable = acpi_cedt + 1; + + while (cur < len) { + struct acpi_cedt_header *c = cedt_subtable + cur; + + if (c->type != ACPI_CEDT_TYPE_CHBS) { + cur += c->length; + continue; + } + + chbs = cedt_subtable + cur; + + if (chbs->header.length < sizeof(*chbs)) { + dev_warn_once(dev, + "CHBS entry skipped: invalid length:%u\n", + chbs->header.length); + cur += c->length; + continue; + } + + if (chbs->uid != uid) { + cur += c->length; + continue; + } + + if (chbs_match) { + dev_warn_once(dev, + "CHBS entry skipped: duplicate UID:%u\n", + uid); + cur += c->length; + continue; + } + + chbs_match = chbs; + cur += c->length; + } + + return chbs_match ? chbs_match : ERR_PTR(-ENODEV); +} + +static resource_size_t get_chbcr(struct acpi_cedt_chbs *chbs) +{ + return IS_ERR(chbs) ? CXL_RESOURCE_NONE : chbs->base; +} + +struct cxl_walk_context { + struct device *dev; + struct pci_bus *root; + struct cxl_port *port; + int error; + int count; +}; + +static int match_add_root_ports(struct pci_dev *pdev, void *data) +{ + struct cxl_walk_context *ctx = data; + struct pci_bus *root_bus = ctx->root; + struct cxl_port *port = ctx->port; + int type = pci_pcie_type(pdev); + struct device *dev = ctx->dev; + u32 lnkcap, port_num; + int rc; + + if (pdev->bus != root_bus) + return 0; + if (!pci_is_pcie(pdev)) + return 0; + if (type != PCI_EXP_TYPE_ROOT_PORT) + return 0; + if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP, + &lnkcap) != PCIBIOS_SUCCESSFUL) + return 0; + + /* TODO walk DVSEC to find component register base */ + port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); + rc = cxl_add_dport(port, &pdev->dev, port_num, CXL_RESOURCE_NONE); + if (rc) { + ctx->error = rc; + return rc; + } + ctx->count++; + + dev_dbg(dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev)); + + return 0; +} + +static struct cxl_dport *find_dport_by_dev(struct cxl_port *port, struct device *dev) +{ + struct cxl_dport *dport; + + device_lock(&port->dev); + list_for_each_entry(dport, &port->dports, list) + if (dport->dport == dev) { + device_unlock(&port->dev); + return dport; + } + + device_unlock(&port->dev); + return NULL; +} + +static struct acpi_device *to_cxl_host_bridge(struct device *dev) +{ + struct acpi_device *adev = to_acpi_device(dev); + + if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0) + return adev; + return NULL; +} + +/* + * A host bridge is a dport to a CFMWS decode and it is a uport to the + * dport (PCIe Root Ports) in the host bridge. + */ +static int add_host_bridge_uport(struct device *match, void *arg) +{ + struct acpi_device *bridge = to_cxl_host_bridge(match); + struct cxl_port *root_port = arg; + struct device *host = root_port->dev.parent; + struct acpi_pci_root *pci_root; + struct cxl_walk_context ctx; + struct cxl_decoder *cxld; + struct cxl_dport *dport; + struct cxl_port *port; + + if (!bridge) + return 0; + + pci_root = acpi_pci_find_root(bridge->handle); + if (!pci_root) + return -ENXIO; + + dport = find_dport_by_dev(root_port, match); + if (!dport) { + dev_dbg(host, "host bridge expected and not found\n"); + return -ENODEV; + } + + port = devm_cxl_add_port(host, match, dport->component_reg_phys, + root_port); + if (IS_ERR(port)) + return PTR_ERR(port); + dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev)); + + ctx = (struct cxl_walk_context){ + .dev = host, + .root = pci_root->bus, + .port = port, + }; + pci_walk_bus(pci_root->bus, match_add_root_ports, &ctx); + + if (ctx.count == 0) + return -ENODEV; + if (ctx.error) + return ctx.error; + + /* TODO: Scan CHBCR for HDM Decoder resources */ + + /* + * In the single-port host-bridge case there are no HDM decoders + * in the CHBCR and a 1:1 passthrough decode is implied. + */ + if (ctx.count == 1) { + cxld = devm_cxl_add_passthrough_decoder(host, port); + if (IS_ERR(cxld)) + return PTR_ERR(cxld); + + dev_dbg(host, "add: %s\n", dev_name(&cxld->dev)); + } + + return 0; +} + +static int add_host_bridge_dport(struct device *match, void *arg) +{ + int rc; + acpi_status status; + unsigned long long uid; + struct acpi_cedt_chbs *chbs; + struct cxl_port *root_port = arg; + struct device *host = root_port->dev.parent; + struct acpi_device *bridge = to_cxl_host_bridge(match); + + if (!bridge) + return 0; + + status = acpi_evaluate_integer(bridge->handle, METHOD_NAME__UID, NULL, + &uid); + if (status != AE_OK) { + dev_err(host, "unable to retrieve _UID of %s\n", + dev_name(match)); + return -ENODEV; + } + + chbs = cxl_acpi_match_chbs(host, uid); + if (IS_ERR(chbs)) + dev_dbg(host, "No CHBS found for Host Bridge: %s\n", + dev_name(match)); + + rc = cxl_add_dport(root_port, match, uid, get_chbcr(chbs)); + if (rc) { + dev_err(host, "failed to add downstream port: %s\n", + dev_name(match)); + return rc; + } + dev_dbg(host, "add dport%llu: %s\n", uid, dev_name(match)); + return 0; +} + +static int add_root_nvdimm_bridge(struct device *match, void *data) +{ + struct cxl_decoder *cxld; + struct cxl_port *root_port = data; + struct cxl_nvdimm_bridge *cxl_nvb; + struct device *host = root_port->dev.parent; + + if (!is_root_decoder(match)) + return 0; + + cxld = to_cxl_decoder(match); + if (!(cxld->flags & CXL_DECODER_F_PMEM)) + return 0; + + cxl_nvb = devm_cxl_add_nvdimm_bridge(host, root_port); + if (IS_ERR(cxl_nvb)) { + dev_dbg(host, "failed to register pmem\n"); + return PTR_ERR(cxl_nvb); + } + dev_dbg(host, "%s: add: %s\n", dev_name(&root_port->dev), + dev_name(&cxl_nvb->dev)); + return 1; +} + +static int cxl_acpi_probe(struct platform_device *pdev) +{ + int rc; + acpi_status status; + struct cxl_port *root_port; + struct device *host = &pdev->dev; + struct acpi_device *adev = ACPI_COMPANION(host); + + root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL); + if (IS_ERR(root_port)) + return PTR_ERR(root_port); + dev_dbg(host, "add: %s\n", dev_name(&root_port->dev)); + + status = acpi_get_table(ACPI_SIG_CEDT, 0, &acpi_cedt); + if (ACPI_FAILURE(status)) + return -ENXIO; + + rc = bus_for_each_dev(adev->dev.bus, NULL, root_port, + add_host_bridge_dport); + if (rc) + goto out; + + cxl_add_cfmws_decoders(host, root_port); + + /* + * Root level scanned with host-bridge as dports, now scan host-bridges + * for their role as CXL uports to their CXL-capable PCIe Root Ports. + */ + rc = bus_for_each_dev(adev->dev.bus, NULL, root_port, + add_host_bridge_uport); + if (rc) + goto out; + + if (IS_ENABLED(CONFIG_CXL_PMEM)) + rc = device_for_each_child(&root_port->dev, root_port, + add_root_nvdimm_bridge); + +out: + acpi_put_table(acpi_cedt); + if (rc < 0) + return rc; + return 0; +} + +static const struct acpi_device_id cxl_acpi_ids[] = { + { "ACPI0017", 0 }, + { "", 0 }, +}; +MODULE_DEVICE_TABLE(acpi, cxl_acpi_ids); + +static struct platform_driver cxl_acpi_driver = { + .probe = cxl_acpi_probe, + .driver = { + .name = KBUILD_MODNAME, + .acpi_match_table = cxl_acpi_ids, + }, +}; + +module_platform_driver(cxl_acpi_driver); +MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/bus.c b/drivers/cxl/bus.c deleted file mode 100644 index 58f74796d525..000000000000 --- a/drivers/cxl/bus.c +++ /dev/null @@ -1,29 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright(c) 2020 Intel Corporation. All rights reserved. */ -#include <linux/device.h> -#include <linux/module.h> - -/** - * DOC: cxl bus - * - * The CXL bus provides namespace for control devices and a rendezvous - * point for cross-device interleave coordination. - */ -struct bus_type cxl_bus_type = { - .name = "cxl", -}; -EXPORT_SYMBOL_GPL(cxl_bus_type); - -static __init int cxl_bus_init(void) -{ - return bus_register(&cxl_bus_type); -} - -static void cxl_bus_exit(void) -{ - bus_unregister(&cxl_bus_type); -} - -module_init(cxl_bus_init); -module_exit(cxl_bus_exit); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/cxl/core.c b/drivers/cxl/core.c new file mode 100644 index 000000000000..a2e4d54fc7bc --- /dev/null +++ b/drivers/cxl/core.c @@ -0,0 +1,1067 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2020 Intel Corporation. All rights reserved. */ +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/slab.h> +#include <linux/idr.h> +#include "cxl.h" +#include "mem.h" + +/** + * DOC: cxl core + * + * The CXL core provides a sysfs hierarchy for control devices and a rendezvous + * point for cross-device interleave coordination through cxl ports. + */ + +static DEFINE_IDA(cxl_port_ida); + +static ssize_t devtype_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "%s\n", dev->type->name); +} +static DEVICE_ATTR_RO(devtype); + +static struct attribute *cxl_base_attributes[] = { + &dev_attr_devtype.attr, + NULL, +}; + +static struct attribute_group cxl_base_attribute_group = { + .attrs = cxl_base_attributes, +}; + +static ssize_t start_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cxl_decoder *cxld = to_cxl_decoder(dev); + + return sysfs_emit(buf, "%#llx\n", cxld->range.start); +} +static DEVICE_ATTR_RO(start); + +static ssize_t size_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cxl_decoder *cxld = to_cxl_decoder(dev); + + return sysfs_emit(buf, "%#llx\n", range_len(&cxld->range)); +} +static DEVICE_ATTR_RO(size); + +#define CXL_DECODER_FLAG_ATTR(name, flag) \ +static ssize_t name##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct cxl_decoder *cxld = to_cxl_decoder(dev); \ + \ + return sysfs_emit(buf, "%s\n", \ + (cxld->flags & (flag)) ? "1" : "0"); \ +} \ +static DEVICE_ATTR_RO(name) + +CXL_DECODER_FLAG_ATTR(cap_pmem, CXL_DECODER_F_PMEM); +CXL_DECODER_FLAG_ATTR(cap_ram, CXL_DECODER_F_RAM); +CXL_DECODER_FLAG_ATTR(cap_type2, CXL_DECODER_F_TYPE2); +CXL_DECODER_FLAG_ATTR(cap_type3, CXL_DECODER_F_TYPE3); +CXL_DECODER_FLAG_ATTR(locked, CXL_DECODER_F_LOCK); + +static ssize_t target_type_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_decoder *cxld = to_cxl_decoder(dev); + + switch (cxld->target_type) { + case CXL_DECODER_ACCELERATOR: + return sysfs_emit(buf, "accelerator\n"); + case CXL_DECODER_EXPANDER: + return sysfs_emit(buf, "expander\n"); + } + return -ENXIO; +} +static DEVICE_ATTR_RO(target_type); + +static ssize_t target_list_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_decoder *cxld = to_cxl_decoder(dev); + ssize_t offset = 0; + int i, rc = 0; + + device_lock(dev); + for (i = 0; i < cxld->interleave_ways; i++) { + struct cxl_dport *dport = cxld->target[i]; + struct cxl_dport *next = NULL; + + if (!dport) + break; + + if (i + 1 < cxld->interleave_ways) + next = cxld->target[i + 1]; + rc = sysfs_emit_at(buf, offset, "%d%s", dport->port_id, + next ? "," : ""); + if (rc < 0) + break; + offset += rc; + } + device_unlock(dev); + + if (rc < 0) + return rc; + + rc = sysfs_emit_at(buf, offset, "\n"); + if (rc < 0) + return rc; + + return offset + rc; +} +static DEVICE_ATTR_RO(target_list); + +static struct attribute *cxl_decoder_base_attrs[] = { + &dev_attr_start.attr, + &dev_attr_size.attr, + &dev_attr_locked.attr, + &dev_attr_target_list.attr, + NULL, +}; + +static struct attribute_group cxl_decoder_base_attribute_group = { + .attrs = cxl_decoder_base_attrs, +}; + +static struct attribute *cxl_decoder_root_attrs[] = { + &dev_attr_cap_pmem.attr, + &dev_attr_cap_ram.attr, + &dev_attr_cap_type2.attr, + &dev_attr_cap_type3.attr, + NULL, +}; + +static struct attribute_group cxl_decoder_root_attribute_group = { + .attrs = cxl_decoder_root_attrs, +}; + +static const struct attribute_group *cxl_decoder_root_attribute_groups[] = { + &cxl_decoder_root_attribute_group, + &cxl_decoder_base_attribute_group, + &cxl_base_attribute_group, + NULL, +}; + +static struct attribute *cxl_decoder_switch_attrs[] = { + &dev_attr_target_type.attr, + NULL, +}; + +static struct attribute_group cxl_decoder_switch_attribute_group = { + .attrs = cxl_decoder_switch_attrs, +}; + +static const struct attribute_group *cxl_decoder_switch_attribute_groups[] = { + &cxl_decoder_switch_attribute_group, + &cxl_decoder_base_attribute_group, + &cxl_base_attribute_group, + NULL, +}; + +static void cxl_decoder_release(struct device *dev) +{ + struct cxl_decoder *cxld = to_cxl_decoder(dev); + struct cxl_port *port = to_cxl_port(dev->parent); + + ida_free(&port->decoder_ida, cxld->id); + kfree(cxld); +} + +static const struct device_type cxl_decoder_switch_type = { + .name = "cxl_decoder_switch", + .release = cxl_decoder_release, + .groups = cxl_decoder_switch_attribute_groups, +}; + +static const struct device_type cxl_decoder_root_type = { + .name = "cxl_decoder_root", + .release = cxl_decoder_release, + .groups = cxl_decoder_root_attribute_groups, +}; + +bool is_root_decoder(struct device *dev) +{ + return dev->type == &cxl_decoder_root_type; +} +EXPORT_SYMBOL_GPL(is_root_decoder); + +struct cxl_decoder *to_cxl_decoder(struct device *dev) +{ + if (dev_WARN_ONCE(dev, dev->type->release != cxl_decoder_release, + "not a cxl_decoder device\n")) + return NULL; + return container_of(dev, struct cxl_decoder, dev); +} +EXPORT_SYMBOL_GPL(to_cxl_decoder); + +static void cxl_dport_release(struct cxl_dport *dport) +{ + list_del(&dport->list); + put_device(dport->dport); + kfree(dport); +} + +static void cxl_port_release(struct device *dev) +{ + struct cxl_port *port = to_cxl_port(dev); + struct cxl_dport *dport, *_d; + + device_lock(dev); + list_for_each_entry_safe(dport, _d, &port->dports, list) + cxl_dport_release(dport); + device_unlock(dev); + ida_free(&cxl_port_ida, port->id); + kfree(port); +} + +static const struct attribute_group *cxl_port_attribute_groups[] = { + &cxl_base_attribute_group, + NULL, +}; + +static const struct device_type cxl_port_type = { + .name = "cxl_port", + .release = cxl_port_release, + .groups = cxl_port_attribute_groups, +}; + +struct cxl_port *to_cxl_port(struct device *dev) +{ + if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type, + "not a cxl_port device\n")) + return NULL; + return container_of(dev, struct cxl_port, dev); +} + +static void unregister_port(void *_port) +{ + struct cxl_port *port = _port; + struct cxl_dport *dport; + + device_lock(&port->dev); + list_for_each_entry(dport, &port->dports, list) { + char link_name[CXL_TARGET_STRLEN]; + + if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", + dport->port_id) >= CXL_TARGET_STRLEN) + continue; + sysfs_remove_link(&port->dev.kobj, link_name); + } + device_unlock(&port->dev); + device_unregister(&port->dev); +} + +static void cxl_unlink_uport(void *_port) +{ + struct cxl_port *port = _port; + + sysfs_remove_link(&port->dev.kobj, "uport"); +} + +static int devm_cxl_link_uport(struct device *host, struct cxl_port *port) +{ + int rc; + + rc = sysfs_create_link(&port->dev.kobj, &port->uport->kobj, "uport"); + if (rc) + return rc; + return devm_add_action_or_reset(host, cxl_unlink_uport, port); +} + +static struct cxl_port *cxl_port_alloc(struct device *uport, + resource_size_t component_reg_phys, + struct cxl_port *parent_port) +{ + struct cxl_port *port; + struct device *dev; + int rc; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return ERR_PTR(-ENOMEM); + + rc = ida_alloc(&cxl_port_ida, GFP_KERNEL); + if (rc < 0) + goto err; + port->id = rc; + + /* + * The top-level cxl_port "cxl_root" does not have a cxl_port as + * its parent and it does not have any corresponding component + * registers as its decode is described by a fixed platform + * description. + */ + dev = &port->dev; + if (parent_port) + dev->parent = &parent_port->dev; + else + dev->parent = uport; + + port->uport = uport; + port->component_reg_phys = component_reg_phys; + ida_init(&port->decoder_ida); + INIT_LIST_HEAD(&port->dports); + + device_initialize(dev); + device_set_pm_not_required(dev); + dev->bus = &cxl_bus_type; + dev->type = &cxl_port_type; + + return port; + +err: + kfree(port); + return ERR_PTR(rc); +} + +/** + * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy + * @host: host device for devm operations + * @uport: "physical" device implementing this upstream port + * @component_reg_phys: (optional) for configurable cxl_port instances + * @parent_port: next hop up in the CXL memory decode hierarchy + */ +struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, + resource_size_t component_reg_phys, + struct cxl_port *parent_port) +{ + struct cxl_port *port; + struct device *dev; + int rc; + + port = cxl_port_alloc(uport, component_reg_phys, parent_port); + if (IS_ERR(port)) + return port; + + dev = &port->dev; + if (parent_port) + rc = dev_set_name(dev, "port%d", port->id); + else + rc = dev_set_name(dev, "root%d", port->id); + if (rc) + goto err; + + rc = device_add(dev); + if (rc) + goto err; + + rc = devm_add_action_or_reset(host, unregister_port, port); + if (rc) + return ERR_PTR(rc); + + rc = devm_cxl_link_uport(host, port); + if (rc) + return ERR_PTR(rc); + + return port; + +err: + put_device(dev); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_GPL(devm_cxl_add_port); + +static struct cxl_dport *find_dport(struct cxl_port *port, int id) +{ + struct cxl_dport *dport; + + device_lock_assert(&port->dev); + list_for_each_entry (dport, &port->dports, list) + if (dport->port_id == id) + return dport; + return NULL; +} + +static int add_dport(struct cxl_port *port, struct cxl_dport *new) +{ + struct cxl_dport *dup; + + device_lock(&port->dev); + dup = find_dport(port, new->port_id); + if (dup) + dev_err(&port->dev, + "unable to add dport%d-%s non-unique port id (%s)\n", + new->port_id, dev_name(new->dport), + dev_name(dup->dport)); + else + list_add_tail(&new->list, &port->dports); + device_unlock(&port->dev); + + return dup ? -EEXIST : 0; +} + +/** + * cxl_add_dport - append downstream port data to a cxl_port + * @port: the cxl_port that references this dport + * @dport_dev: firmware or PCI device representing the dport + * @port_id: identifier for this dport in a decoder's target list + * @component_reg_phys: optional location of CXL component registers + * + * Note that all allocations and links are undone by cxl_port deletion + * and release. + */ +int cxl_add_dport(struct cxl_port *port, struct device *dport_dev, int port_id, + resource_size_t component_reg_phys) +{ + char link_name[CXL_TARGET_STRLEN]; + struct cxl_dport *dport; + int rc; + + if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", port_id) >= + CXL_TARGET_STRLEN) + return -EINVAL; + + dport = kzalloc(sizeof(*dport), GFP_KERNEL); + if (!dport) + return -ENOMEM; + + INIT_LIST_HEAD(&dport->list); + dport->dport = get_device(dport_dev); + dport->port_id = port_id; + dport->component_reg_phys = component_reg_phys; + dport->port = port; + + rc = add_dport(port, dport); + if (rc) + goto err; + + rc = sysfs_create_link(&port->dev.kobj, &dport_dev->kobj, link_name); + if (rc) + goto err; + + return 0; +err: + cxl_dport_release(dport); + return rc; +} +EXPORT_SYMBOL_GPL(cxl_add_dport); + +static struct cxl_decoder * +cxl_decoder_alloc(struct cxl_port *port, int nr_targets, resource_size_t base, + resource_size_t len, int interleave_ways, + int interleave_granularity, enum cxl_decoder_type type, + unsigned long flags) +{ + struct cxl_decoder *cxld; + struct device *dev; + int rc = 0; + + if (interleave_ways < 1) + return ERR_PTR(-EINVAL); + + device_lock(&port->dev); + if (list_empty(&port->dports)) + rc = -EINVAL; + device_unlock(&port->dev); + if (rc) + return ERR_PTR(rc); + + cxld = kzalloc(struct_size(cxld, target, nr_targets), GFP_KERNEL); + if (!cxld) + return ERR_PTR(-ENOMEM); + + rc = ida_alloc(&port->decoder_ida, GFP_KERNEL); + if (rc < 0) + goto err; + + *cxld = (struct cxl_decoder) { + .id = rc, + .range = { + .start = base, + .end = base + len - 1, + }, + .flags = flags, + .interleave_ways = interleave_ways, + .interleave_granularity = interleave_granularity, + .target_type = type, + }; + + /* handle implied target_list */ + if (interleave_ways == 1) + cxld->target[0] = + list_first_entry(&port->dports, struct cxl_dport, list); + dev = &cxld->dev; + device_initialize(dev); + device_set_pm_not_required(dev); + dev->parent = &port->dev; + dev->bus = &cxl_bus_type; + + /* root ports do not have a cxl_port_type parent */ + if (port->dev.parent->type == &cxl_port_type) + dev->type = &cxl_decoder_switch_type; + else + dev->type = &cxl_decoder_root_type; + + return cxld; +err: + kfree(cxld); + return ERR_PTR(rc); +} + +static void unregister_dev(void *dev) +{ + device_unregister(dev); +} + +struct cxl_decoder * +devm_cxl_add_decoder(struct device *host, struct cxl_port *port, int nr_targets, + resource_size_t base, resource_size_t len, + int interleave_ways, int interleave_granularity, + enum cxl_decoder_type type, unsigned long flags) +{ + struct cxl_decoder *cxld; + struct device *dev; + int rc; + + cxld = cxl_decoder_alloc(port, nr_targets, base, len, interleave_ways, + interleave_granularity, type, flags); + if (IS_ERR(cxld)) + return cxld; + + dev = &cxld->dev; + rc = dev_set_name(dev, "decoder%d.%d", port->id, cxld->id); + if (rc) + goto err; + + rc = device_add(dev); + if (rc) + goto err; + + rc = devm_add_action_or_reset(host, unregister_dev, dev); + if (rc) + return ERR_PTR(rc); + return cxld; + +err: + put_device(dev); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_GPL(devm_cxl_add_decoder); + +/** + * cxl_probe_component_regs() - Detect CXL Component register blocks + * @dev: Host device of the @base mapping + * @base: Mapping containing the HDM Decoder Capability Header + * @map: Map object describing the register block information found + * + * See CXL 2.0 8.2.4 Component Register Layout and Definition + * See CXL 2.0 8.2.5.5 CXL Device Register Interface + * + * Probe for component register information and return it in map object. + */ +void cxl_probe_component_regs(struct device *dev, void __iomem *base, + struct cxl_component_reg_map *map) +{ + int cap, cap_count; + u64 cap_array; + + *map = (struct cxl_component_reg_map) { 0 }; + + /* + * CXL.cache and CXL.mem registers are at offset 0x1000 as defined in + * CXL 2.0 8.2.4 Table 141. + */ + base += CXL_CM_OFFSET; + + cap_array = readq(base + CXL_CM_CAP_HDR_OFFSET); + + if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_array) != CM_CAP_HDR_CAP_ID) { + dev_err(dev, + "Couldn't locate the CXL.cache and CXL.mem capability array header./n"); + return; + } + + /* It's assumed that future versions will be backward compatible */ + cap_count = FIELD_GET(CXL_CM_CAP_HDR_ARRAY_SIZE_MASK, cap_array); + + for (cap = 1; cap <= cap_count; cap++) { + void __iomem *register_block; + u32 hdr; + int decoder_cnt; + u16 cap_id, offset; + u32 length; + + hdr = readl(base + cap * 0x4); + + cap_id = FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, hdr); + offset = FIELD_GET(CXL_CM_CAP_PTR_MASK, hdr); + register_block = base + offset; + + switch (cap_id) { + case CXL_CM_CAP_CAP_ID_HDM: + dev_dbg(dev, "found HDM decoder capability (0x%x)\n", + offset); + + hdr = readl(register_block); + + decoder_cnt = cxl_hdm_decoder_count(hdr); + length = 0x20 * decoder_cnt + 0x10; + + map->hdm_decoder.valid = true; + map->hdm_decoder.offset = CXL_CM_OFFSET + offset; + map->hdm_decoder.size = length; + break; + default: + dev_dbg(dev, "Unknown CM cap ID: %d (0x%x)\n", cap_id, + offset); + break; + } + } +} +EXPORT_SYMBOL_GPL(cxl_probe_component_regs); + +static void cxl_nvdimm_bridge_release(struct device *dev) +{ + struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev); + + kfree(cxl_nvb); +} + +static const struct attribute_group *cxl_nvdimm_bridge_attribute_groups[] = { + &cxl_base_attribute_group, + NULL, +}; + +static const struct device_type cxl_nvdimm_bridge_type = { + .name = "cxl_nvdimm_bridge", + .release = cxl_nvdimm_bridge_release, + .groups = cxl_nvdimm_bridge_attribute_groups, +}; + +struct cxl_nvdimm_bridge *to_cxl_nvdimm_bridge(struct device *dev) +{ + if (dev_WARN_ONCE(dev, dev->type != &cxl_nvdimm_bridge_type, + "not a cxl_nvdimm_bridge device\n")) + return NULL; + return container_of(dev, struct cxl_nvdimm_bridge, dev); +} +EXPORT_SYMBOL_GPL(to_cxl_nvdimm_bridge); + +static struct cxl_nvdimm_bridge * +cxl_nvdimm_bridge_alloc(struct cxl_port *port) +{ + struct cxl_nvdimm_bridge *cxl_nvb; + struct device *dev; + + cxl_nvb = kzalloc(sizeof(*cxl_nvb), GFP_KERNEL); + if (!cxl_nvb) + return ERR_PTR(-ENOMEM); + + dev = &cxl_nvb->dev; + cxl_nvb->port = port; + cxl_nvb->state = CXL_NVB_NEW; + device_initialize(dev); + device_set_pm_not_required(dev); + dev->parent = &port->dev; + dev->bus = &cxl_bus_type; + dev->type = &cxl_nvdimm_bridge_type; + + return cxl_nvb; +} + +static void unregister_nvb(void *_cxl_nvb) +{ + struct cxl_nvdimm_bridge *cxl_nvb = _cxl_nvb; + bool flush; + + /* + * If the bridge was ever activated then there might be in-flight state + * work to flush. Once the state has been changed to 'dead' then no new + * work can be queued by user-triggered bind. + */ + device_lock(&cxl_nvb->dev); + flush = cxl_nvb->state != CXL_NVB_NEW; + cxl_nvb->state = CXL_NVB_DEAD; + device_unlock(&cxl_nvb->dev); + + /* + * Even though the device core will trigger device_release_driver() + * before the unregister, it does not know about the fact that + * cxl_nvdimm_bridge_driver defers ->remove() work. So, do the driver + * release not and flush it before tearing down the nvdimm device + * hierarchy. + */ + device_release_driver(&cxl_nvb->dev); + if (flush) + flush_work(&cxl_nvb->state_work); + device_unregister(&cxl_nvb->dev); +} + +struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, + struct cxl_port *port) +{ + struct cxl_nvdimm_bridge *cxl_nvb; + struct device *dev; + int rc; + + if (!IS_ENABLED(CONFIG_CXL_PMEM)) + return ERR_PTR(-ENXIO); + + cxl_nvb = cxl_nvdimm_bridge_alloc(port); + if (IS_ERR(cxl_nvb)) + return cxl_nvb; + + dev = &cxl_nvb->dev; + rc = dev_set_name(dev, "nvdimm-bridge"); + if (rc) + goto err; + + rc = device_add(dev); + if (rc) + goto err; + + rc = devm_add_action_or_reset(host, unregister_nvb, cxl_nvb); + if (rc) + return ERR_PTR(rc); + + return cxl_nvb; + +err: + put_device(dev); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_GPL(devm_cxl_add_nvdimm_bridge); + +static void cxl_nvdimm_release(struct device *dev) +{ + struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev); + + kfree(cxl_nvd); +} + +static const struct attribute_group *cxl_nvdimm_attribute_groups[] = { + &cxl_base_attribute_group, + NULL, +}; + +static const struct device_type cxl_nvdimm_type = { + .name = "cxl_nvdimm", + .release = cxl_nvdimm_release, + .groups = cxl_nvdimm_attribute_groups, +}; + +bool is_cxl_nvdimm(struct device *dev) +{ + return dev->type == &cxl_nvdimm_type; +} +EXPORT_SYMBOL_GPL(is_cxl_nvdimm); + +struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev) +{ + if (dev_WARN_ONCE(dev, !is_cxl_nvdimm(dev), + "not a cxl_nvdimm device\n")) + return NULL; + return container_of(dev, struct cxl_nvdimm, dev); +} +EXPORT_SYMBOL_GPL(to_cxl_nvdimm); + +static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd) +{ + struct cxl_nvdimm *cxl_nvd; + struct device *dev; + + cxl_nvd = kzalloc(sizeof(*cxl_nvd), GFP_KERNEL); + if (!cxl_nvd) + return ERR_PTR(-ENOMEM); + + dev = &cxl_nvd->dev; + cxl_nvd->cxlmd = cxlmd; + device_initialize(dev); + device_set_pm_not_required(dev); + dev->parent = &cxlmd->dev; + dev->bus = &cxl_bus_type; + dev->type = &cxl_nvdimm_type; + + return cxl_nvd; +} + +int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd) +{ + struct cxl_nvdimm *cxl_nvd; + struct device *dev; + int rc; + + cxl_nvd = cxl_nvdimm_alloc(cxlmd); + if (IS_ERR(cxl_nvd)) + return PTR_ERR(cxl_nvd); + + dev = &cxl_nvd->dev; + rc = dev_set_name(dev, "pmem%d", cxlmd->id); + if (rc) + goto err; + + rc = device_add(dev); + if (rc) + goto err; + + dev_dbg(host, "%s: register %s\n", dev_name(dev->parent), + dev_name(dev)); + + return devm_add_action_or_reset(host, unregister_dev, dev); + +err: + put_device(dev); + return rc; +} +EXPORT_SYMBOL_GPL(devm_cxl_add_nvdimm); + +/** + * cxl_probe_device_regs() - Detect CXL Device register blocks + * @dev: Host device of the @base mapping + * @base: Mapping of CXL 2.0 8.2.8 CXL Device Register Interface + * @map: Map object describing the register block information found + * + * Probe for device register information and return it in map object. + */ +void cxl_probe_device_regs(struct device *dev, void __iomem *base, + struct cxl_device_reg_map *map) +{ + int cap, cap_count; + u64 cap_array; + + *map = (struct cxl_device_reg_map){ 0 }; + + cap_array = readq(base + CXLDEV_CAP_ARRAY_OFFSET); + if (FIELD_GET(CXLDEV_CAP_ARRAY_ID_MASK, cap_array) != + CXLDEV_CAP_ARRAY_CAP_ID) + return; + + cap_count = FIELD_GET(CXLDEV_CAP_ARRAY_COUNT_MASK, cap_array); + + for (cap = 1; cap <= cap_count; cap++) { + u32 offset, length; + u16 cap_id; + + cap_id = FIELD_GET(CXLDEV_CAP_HDR_CAP_ID_MASK, + readl(base + cap * 0x10)); + offset = readl(base + cap * 0x10 + 0x4); + length = readl(base + cap * 0x10 + 0x8); + + switch (cap_id) { + case CXLDEV_CAP_CAP_ID_DEVICE_STATUS: + dev_dbg(dev, "found Status capability (0x%x)\n", offset); + + map->status.valid = true; + map->status.offset = offset; + map->status.size = length; + break; + case CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX: + dev_dbg(dev, "found Mailbox capability (0x%x)\n", offset); + map->mbox.valid = true; + map->mbox.offset = offset; + map->mbox.size = length; + break; + case CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX: + dev_dbg(dev, "found Secondary Mailbox capability (0x%x)\n", offset); + break; + case CXLDEV_CAP_CAP_ID_MEMDEV: + dev_dbg(dev, "found Memory Device capability (0x%x)\n", offset); + map->memdev.valid = true; + map->memdev.offset = offset; + map->memdev.size = length; + break; + default: + if (cap_id >= 0x8000) + dev_dbg(dev, "Vendor cap ID: %#x offset: %#x\n", cap_id, offset); + else + dev_dbg(dev, "Unknown cap ID: %#x offset: %#x\n", cap_id, offset); + break; + } + } +} +EXPORT_SYMBOL_GPL(cxl_probe_device_regs); + +static void __iomem *devm_cxl_iomap_block(struct device *dev, + resource_size_t addr, + resource_size_t length) +{ + void __iomem *ret_val; + struct resource *res; + + res = devm_request_mem_region(dev, addr, length, dev_name(dev)); + if (!res) { + resource_size_t end = addr + length - 1; + + dev_err(dev, "Failed to request region %pa-%pa\n", &addr, &end); + return NULL; + } + + ret_val = devm_ioremap(dev, addr, length); + if (!ret_val) + dev_err(dev, "Failed to map region %pr\n", res); + + return ret_val; +} + +int cxl_map_component_regs(struct pci_dev *pdev, + struct cxl_component_regs *regs, + struct cxl_register_map *map) +{ + struct device *dev = &pdev->dev; + resource_size_t phys_addr; + resource_size_t length; + + phys_addr = pci_resource_start(pdev, map->barno); + phys_addr += map->block_offset; + + phys_addr += map->component_map.hdm_decoder.offset; + length = map->component_map.hdm_decoder.size; + regs->hdm_decoder = devm_cxl_iomap_block(dev, phys_addr, length); + if (!regs->hdm_decoder) + return -ENOMEM; + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_map_component_regs); + +int cxl_map_device_regs(struct pci_dev *pdev, + struct cxl_device_regs *regs, + struct cxl_register_map *map) +{ + struct device *dev = &pdev->dev; + resource_size_t phys_addr; + + phys_addr = pci_resource_start(pdev, map->barno); + phys_addr += map->block_offset; + + if (map->device_map.status.valid) { + resource_size_t addr; + resource_size_t length; + + addr = phys_addr + map->device_map.status.offset; + length = map->device_map.status.size; + regs->status = devm_cxl_iomap_block(dev, addr, length); + if (!regs->status) + return -ENOMEM; + } + + if (map->device_map.mbox.valid) { + resource_size_t addr; + resource_size_t length; + + addr = phys_addr + map->device_map.mbox.offset; + length = map->device_map.mbox.size; + regs->mbox = devm_cxl_iomap_block(dev, addr, length); + if (!regs->mbox) + return -ENOMEM; + } + + if (map->device_map.memdev.valid) { + resource_size_t addr; + resource_size_t length; + + addr = phys_addr + map->device_map.memdev.offset; + length = map->device_map.memdev.size; + regs->memdev = devm_cxl_iomap_block(dev, addr, length); + if (!regs->memdev) + return -ENOMEM; + } + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_map_device_regs); + +/** + * __cxl_driver_register - register a driver for the cxl bus + * @cxl_drv: cxl driver structure to attach + * @owner: owning module/driver + * @modname: KBUILD_MODNAME for parent driver + */ +int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner, + const char *modname) +{ + if (!cxl_drv->probe) { + pr_debug("%s ->probe() must be specified\n", modname); + return -EINVAL; + } + + if (!cxl_drv->name) { + pr_debug("%s ->name must be specified\n", modname); + return -EINVAL; + } + + if (!cxl_drv->id) { + pr_debug("%s ->id must be specified\n", modname); + return -EINVAL; + } + + cxl_drv->drv.bus = &cxl_bus_type; + cxl_drv->drv.owner = owner; + cxl_drv->drv.mod_name = modname; + cxl_drv->drv.name = cxl_drv->name; + + return driver_register(&cxl_drv->drv); +} +EXPORT_SYMBOL_GPL(__cxl_driver_register); + +void cxl_driver_unregister(struct cxl_driver *cxl_drv) +{ + driver_unregister(&cxl_drv->drv); +} +EXPORT_SYMBOL_GPL(cxl_driver_unregister); + +static int cxl_device_id(struct device *dev) +{ + if (dev->type == &cxl_nvdimm_bridge_type) + return CXL_DEVICE_NVDIMM_BRIDGE; + if (dev->type == &cxl_nvdimm_type) + return CXL_DEVICE_NVDIMM; + return 0; +} + +static int cxl_bus_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + return add_uevent_var(env, "MODALIAS=" CXL_MODALIAS_FMT, + cxl_device_id(dev)); +} + +static int cxl_bus_match(struct device *dev, struct device_driver *drv) +{ + return cxl_device_id(dev) == to_cxl_drv(drv)->id; +} + +static int cxl_bus_probe(struct device *dev) +{ + return to_cxl_drv(dev->driver)->probe(dev); +} + +static int cxl_bus_remove(struct device *dev) +{ + struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver); + + if (cxl_drv->remove) + cxl_drv->remove(dev); + return 0; +} + +struct bus_type cxl_bus_type = { + .name = "cxl", + .uevent = cxl_bus_uevent, + .match = cxl_bus_match, + .probe = cxl_bus_probe, + .remove = cxl_bus_remove, +}; +EXPORT_SYMBOL_GPL(cxl_bus_type); + +static __init int cxl_core_init(void) +{ + return bus_register(&cxl_bus_type); +} + +static void cxl_core_exit(void) +{ + bus_unregister(&cxl_bus_type); +} + +module_init(cxl_core_init); +module_exit(cxl_core_exit); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 6f14838c2d25..b6bda39a59e3 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -4,10 +4,51 @@ #ifndef __CXL_H__ #define __CXL_H__ +#include <linux/libnvdimm.h> #include <linux/bitfield.h> #include <linux/bitops.h> #include <linux/io.h> +/** + * DOC: cxl objects + * + * The CXL core objects like ports, decoders, and regions are shared + * between the subsystem drivers cxl_acpi, cxl_pci, and core drivers + * (port-driver, region-driver, nvdimm object-drivers... etc). + */ + +/* CXL 2.0 8.2.5 CXL.cache and CXL.mem Registers*/ +#define CXL_CM_OFFSET 0x1000 +#define CXL_CM_CAP_HDR_OFFSET 0x0 +#define CXL_CM_CAP_HDR_ID_MASK GENMASK(15, 0) +#define CM_CAP_HDR_CAP_ID 1 +#define CXL_CM_CAP_HDR_VERSION_MASK GENMASK(19, 16) +#define CM_CAP_HDR_CAP_VERSION 1 +#define CXL_CM_CAP_HDR_CACHE_MEM_VERSION_MASK GENMASK(23, 20) +#define CM_CAP_HDR_CACHE_MEM_VERSION 1 +#define CXL_CM_CAP_HDR_ARRAY_SIZE_MASK GENMASK(31, 24) +#define CXL_CM_CAP_PTR_MASK GENMASK(31, 20) + +#define CXL_CM_CAP_CAP_ID_HDM 0x5 +#define CXL_CM_CAP_CAP_HDM_VERSION 1 + +/* HDM decoders CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure */ +#define CXL_HDM_DECODER_CAP_OFFSET 0x0 +#define CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0) +#define CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4) +#define CXL_HDM_DECODER0_BASE_LOW_OFFSET 0x10 +#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET 0x14 +#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET 0x18 +#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET 0x1c +#define CXL_HDM_DECODER0_CTRL_OFFSET 0x20 + +static inline int cxl_hdm_decoder_count(u32 cap_hdr) +{ + int val = FIELD_GET(CXL_HDM_DECODER_COUNT_MASK, cap_hdr); + + return val ? val * 2 : 1; +} + /* CXL 2.0 8.2.8.1 Device Capabilities Array Register */ #define CXLDEV_CAP_ARRAY_OFFSET 0x0 #define CXLDEV_CAP_ARRAY_CAP_ID 0 @@ -34,62 +75,253 @@ #define CXLDEV_MBOX_BG_CMD_STATUS_OFFSET 0x18 #define CXLDEV_MBOX_PAYLOAD_OFFSET 0x20 -/* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */ -#define CXLMDEV_STATUS_OFFSET 0x0 -#define CXLMDEV_DEV_FATAL BIT(0) -#define CXLMDEV_FW_HALT BIT(1) -#define CXLMDEV_STATUS_MEDIA_STATUS_MASK GENMASK(3, 2) -#define CXLMDEV_MS_NOT_READY 0 -#define CXLMDEV_MS_READY 1 -#define CXLMDEV_MS_ERROR 2 -#define CXLMDEV_MS_DISABLED 3 -#define CXLMDEV_READY(status) \ - (FIELD_GET(CXLMDEV_STATUS_MEDIA_STATUS_MASK, status) == \ - CXLMDEV_MS_READY) -#define CXLMDEV_MBOX_IF_READY BIT(4) -#define CXLMDEV_RESET_NEEDED_MASK GENMASK(7, 5) -#define CXLMDEV_RESET_NEEDED_NOT 0 -#define CXLMDEV_RESET_NEEDED_COLD 1 -#define CXLMDEV_RESET_NEEDED_WARM 2 -#define CXLMDEV_RESET_NEEDED_HOT 3 -#define CXLMDEV_RESET_NEEDED_CXL 4 -#define CXLMDEV_RESET_NEEDED(status) \ - (FIELD_GET(CXLMDEV_RESET_NEEDED_MASK, status) != \ - CXLMDEV_RESET_NEEDED_NOT) - -struct cxl_memdev; +#define CXL_COMPONENT_REGS() \ + void __iomem *hdm_decoder + +#define CXL_DEVICE_REGS() \ + void __iomem *status; \ + void __iomem *mbox; \ + void __iomem *memdev + +/* See note for 'struct cxl_regs' for the rationale of this organization */ +/* + * CXL_COMPONENT_REGS - Common set of CXL Component register block base pointers + * @hdm_decoder: CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure + */ +struct cxl_component_regs { + CXL_COMPONENT_REGS(); +}; + +/* See note for 'struct cxl_regs' for the rationale of this organization */ +/* + * CXL_DEVICE_REGS - Common set of CXL Device register block base pointers + * @status: CXL 2.0 8.2.8.3 Device Status Registers + * @mbox: CXL 2.0 8.2.8.4 Mailbox Registers + * @memdev: CXL 2.0 8.2.8.5 Memory Device Registers + */ +struct cxl_device_regs { + CXL_DEVICE_REGS(); +}; + +/* + * Note, the anonymous union organization allows for per + * register-block-type helper routines, without requiring block-type + * agnostic code to include the prefix. + */ +struct cxl_regs { + union { + struct { + CXL_COMPONENT_REGS(); + }; + struct cxl_component_regs component; + }; + union { + struct { + CXL_DEVICE_REGS(); + }; + struct cxl_device_regs device_regs; + }; +}; + +struct cxl_reg_map { + bool valid; + unsigned long offset; + unsigned long size; +}; + +struct cxl_component_reg_map { + struct cxl_reg_map hdm_decoder; +}; + +struct cxl_device_reg_map { + struct cxl_reg_map status; + struct cxl_reg_map mbox; + struct cxl_reg_map memdev; +}; + +struct cxl_register_map { + struct list_head list; + u64 block_offset; + u8 reg_type; + u8 barno; + union { + struct cxl_component_reg_map component_map; + struct cxl_device_reg_map device_map; + }; +}; + +void cxl_probe_component_regs(struct device *dev, void __iomem *base, + struct cxl_component_reg_map *map); +void cxl_probe_device_regs(struct device *dev, void __iomem *base, + struct cxl_device_reg_map *map); +int cxl_map_component_regs(struct pci_dev *pdev, + struct cxl_component_regs *regs, + struct cxl_register_map *map); +int cxl_map_device_regs(struct pci_dev *pdev, + struct cxl_device_regs *regs, + struct cxl_register_map *map); + +#define CXL_RESOURCE_NONE ((resource_size_t) -1) +#define CXL_TARGET_STRLEN 20 + +/* + * cxl_decoder flags that define the type of memory / devices this + * decoder supports as well as configuration lock status See "CXL 2.0 + * 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details. + */ +#define CXL_DECODER_F_RAM BIT(0) +#define CXL_DECODER_F_PMEM BIT(1) +#define CXL_DECODER_F_TYPE2 BIT(2) +#define CXL_DECODER_F_TYPE3 BIT(3) +#define CXL_DECODER_F_LOCK BIT(4) +#define CXL_DECODER_F_MASK GENMASK(4, 0) + +enum cxl_decoder_type { + CXL_DECODER_ACCELERATOR = 2, + CXL_DECODER_EXPANDER = 3, +}; + /** - * struct cxl_mem - A CXL memory device - * @pdev: The PCI device associated with this CXL device. - * @regs: IO mappings to the device's MMIO - * @status_regs: CXL 2.0 8.2.8.3 Device Status Registers - * @mbox_regs: CXL 2.0 8.2.8.4 Mailbox Registers - * @memdev_regs: CXL 2.0 8.2.8.5 Memory Device Registers - * @payload_size: Size of space for payload - * (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register) - * @mbox_mutex: Mutex to synchronize mailbox access. - * @firmware_version: Firmware version for the memory device. - * @enabled_commands: Hardware commands found enabled in CEL. - * @pmem_range: Persistent memory capacity information. - * @ram_range: Volatile memory capacity information. + * struct cxl_decoder - CXL address range decode configuration + * @dev: this decoder's device + * @id: kernel device name id + * @range: address range considered by this decoder + * @interleave_ways: number of cxl_dports in this decode + * @interleave_granularity: data stride per dport + * @target_type: accelerator vs expander (type2 vs type3) selector + * @flags: memory type capabilities and locking + * @target: active ordered target list in current decoder configuration */ -struct cxl_mem { - struct pci_dev *pdev; - void __iomem *regs; - struct cxl_memdev *cxlmd; +struct cxl_decoder { + struct device dev; + int id; + struct range range; + int interleave_ways; + int interleave_granularity; + enum cxl_decoder_type target_type; + unsigned long flags; + struct cxl_dport *target[]; +}; - void __iomem *status_regs; - void __iomem *mbox_regs; - void __iomem *memdev_regs; - size_t payload_size; - struct mutex mbox_mutex; /* Protects device mailbox and firmware */ - char firmware_version[0x10]; - unsigned long *enabled_cmds; +enum cxl_nvdimm_brige_state { + CXL_NVB_NEW, + CXL_NVB_DEAD, + CXL_NVB_ONLINE, + CXL_NVB_OFFLINE, +}; - struct range pmem_range; - struct range ram_range; +struct cxl_nvdimm_bridge { + struct device dev; + struct cxl_port *port; + struct nvdimm_bus *nvdimm_bus; + struct nvdimm_bus_descriptor nd_desc; + struct work_struct state_work; + enum cxl_nvdimm_brige_state state; }; +struct cxl_nvdimm { + struct device dev; + struct cxl_memdev *cxlmd; + struct nvdimm *nvdimm; +}; + +/** + * struct cxl_port - logical collection of upstream port devices and + * downstream port devices to construct a CXL memory + * decode hierarchy. + * @dev: this port's device + * @uport: PCI or platform device implementing the upstream port capability + * @id: id for port device-name + * @dports: cxl_dport instances referenced by decoders + * @decoder_ida: allocator for decoder ids + * @component_reg_phys: component register capability base address (optional) + */ +struct cxl_port { + struct device dev; + struct device *uport; + int id; + struct list_head dports; + struct ida decoder_ida; + resource_size_t component_reg_phys; +}; + +/** + * struct cxl_dport - CXL downstream port + * @dport: PCI bridge or firmware device representing the downstream link + * @port_id: unique hardware identifier for dport in decoder target list + * @component_reg_phys: downstream port component registers + * @port: reference to cxl_port that contains this downstream port + * @list: node for a cxl_port's list of cxl_dport instances + */ +struct cxl_dport { + struct device *dport; + int port_id; + resource_size_t component_reg_phys; + struct cxl_port *port; + struct list_head list; +}; + +struct cxl_port *to_cxl_port(struct device *dev); +struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, + resource_size_t component_reg_phys, + struct cxl_port *parent_port); + +int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id, + resource_size_t component_reg_phys); + +struct cxl_decoder *to_cxl_decoder(struct device *dev); +bool is_root_decoder(struct device *dev); +struct cxl_decoder * +devm_cxl_add_decoder(struct device *host, struct cxl_port *port, int nr_targets, + resource_size_t base, resource_size_t len, + int interleave_ways, int interleave_granularity, + enum cxl_decoder_type type, unsigned long flags); + +/* + * Per the CXL specification (8.2.5.12 CXL HDM Decoder Capability Structure) + * single ported host-bridges need not publish a decoder capability when a + * passthrough decode can be assumed, i.e. all transactions that the uport sees + * are claimed and passed to the single dport. Default the range a 0-base + * 0-length until the first CXL region is activated. + */ +static inline struct cxl_decoder * +devm_cxl_add_passthrough_decoder(struct device *host, struct cxl_port *port) +{ + return devm_cxl_add_decoder(host, port, 1, 0, 0, 1, PAGE_SIZE, + CXL_DECODER_EXPANDER, 0); +} + extern struct bus_type cxl_bus_type; + +struct cxl_driver { + const char *name; + int (*probe)(struct device *dev); + void (*remove)(struct device *dev); + struct device_driver drv; + int id; +}; + +static inline struct cxl_driver *to_cxl_drv(struct device_driver *drv) +{ + return container_of(drv, struct cxl_driver, drv); +} + +int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner, + const char *modname); +#define cxl_driver_register(x) __cxl_driver_register(x, THIS_MODULE, KBUILD_MODNAME) +void cxl_driver_unregister(struct cxl_driver *cxl_drv); + +#define CXL_DEVICE_NVDIMM_BRIDGE 1 +#define CXL_DEVICE_NVDIMM 2 + +#define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*") +#define CXL_MODALIAS_FMT "cxl:t%d" + +struct cxl_nvdimm_bridge *to_cxl_nvdimm_bridge(struct device *dev); +struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, + struct cxl_port *port); +struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev); +bool is_cxl_nvdimm(struct device *dev); +int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd); #endif /* __CXL_H__ */ diff --git a/drivers/cxl/mem.h b/drivers/cxl/mem.h new file mode 100644 index 000000000000..8f02d02b26b4 --- /dev/null +++ b/drivers/cxl/mem.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2020-2021 Intel Corporation. */ +#ifndef __CXL_MEM_H__ +#define __CXL_MEM_H__ +#include <linux/cdev.h> +#include "cxl.h" + +/* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */ +#define CXLMDEV_STATUS_OFFSET 0x0 +#define CXLMDEV_DEV_FATAL BIT(0) +#define CXLMDEV_FW_HALT BIT(1) +#define CXLMDEV_STATUS_MEDIA_STATUS_MASK GENMASK(3, 2) +#define CXLMDEV_MS_NOT_READY 0 +#define CXLMDEV_MS_READY 1 +#define CXLMDEV_MS_ERROR 2 +#define CXLMDEV_MS_DISABLED 3 +#define CXLMDEV_READY(status) \ + (FIELD_GET(CXLMDEV_STATUS_MEDIA_STATUS_MASK, status) == \ + CXLMDEV_MS_READY) +#define CXLMDEV_MBOX_IF_READY BIT(4) +#define CXLMDEV_RESET_NEEDED_MASK GENMASK(7, 5) +#define CXLMDEV_RESET_NEEDED_NOT 0 +#define CXLMDEV_RESET_NEEDED_COLD 1 +#define CXLMDEV_RESET_NEEDED_WARM 2 +#define CXLMDEV_RESET_NEEDED_HOT 3 +#define CXLMDEV_RESET_NEEDED_CXL 4 +#define CXLMDEV_RESET_NEEDED(status) \ + (FIELD_GET(CXLMDEV_RESET_NEEDED_MASK, status) != \ + CXLMDEV_RESET_NEEDED_NOT) + +/* + * An entire PCI topology full of devices should be enough for any + * config + */ +#define CXL_MEM_MAX_DEVS 65536 + +/** + * struct cxl_memdev - CXL bus object representing a Type-3 Memory Device + * @dev: driver core device object + * @cdev: char dev core object for ioctl operations + * @cxlm: pointer to the parent device driver data + * @id: id number of this memdev instance. + */ +struct cxl_memdev { + struct device dev; + struct cdev cdev; + struct cxl_mem *cxlm; + int id; +}; + +/** + * struct cxl_mem - A CXL memory device + * @pdev: The PCI device associated with this CXL device. + * @cxlmd: Logical memory device chardev / interface + * @regs: Parsed register blocks + * @payload_size: Size of space for payload + * (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register) + * @lsa_size: Size of Label Storage Area + * (CXL 2.0 8.2.9.5.1.1 Identify Memory Device) + * @mbox_mutex: Mutex to synchronize mailbox access. + * @firmware_version: Firmware version for the memory device. + * @enabled_cmds: Hardware commands found enabled in CEL. + * @pmem_range: Persistent memory capacity information. + * @ram_range: Volatile memory capacity information. + */ +struct cxl_mem { + struct pci_dev *pdev; + struct cxl_memdev *cxlmd; + + struct cxl_regs regs; + + size_t payload_size; + size_t lsa_size; + struct mutex mbox_mutex; /* Protects device mailbox and firmware */ + char firmware_version[0x10]; + unsigned long *enabled_cmds; + + struct range pmem_range; + struct range ram_range; +}; +#endif /* __CXL_MEM_H__ */ diff --git a/drivers/cxl/mem.c b/drivers/cxl/pci.c index 2acc6173da36..4cf351a3cf99 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/pci.c @@ -6,6 +6,7 @@ #include <linux/module.h> #include <linux/sizes.h> #include <linux/mutex.h> +#include <linux/list.h> #include <linux/cdev.h> #include <linux/idr.h> #include <linux/pci.h> @@ -13,12 +14,14 @@ #include <linux/io-64-nonatomic-lo-hi.h> #include "pci.h" #include "cxl.h" +#include "mem.h" /** - * DOC: cxl mem + * DOC: cxl pci * - * This implements a CXL memory device ("type-3") as it is defined by the - * Compute Express Link specification. + * This implements the PCI exclusive functionality for a CXL device as it is + * defined by the Compute Express Link specification. CXL devices may surface + * certain functionality even if it isn't CXL enabled. * * The driver has several responsibilities, mainly: * - Create the memX device and register on the CXL bus. @@ -26,18 +29,10 @@ * - Probe the device attributes to establish sysfs interface. * - Provide an IOCTL interface to userspace to communicate with the device for * things like firmware update. - * - Support management of interleave sets. - * - Handle and manage error conditions. */ -/* - * An entire PCI topology full of devices should be enough for any - * config - */ -#define CXL_MEM_MAX_DEVS 65536 - #define cxl_doorbell_busy(cxlm) \ - (readl((cxlm)->mbox_regs + CXLDEV_MBOX_CTRL_OFFSET) & \ + (readl((cxlm)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \ CXLDEV_MBOX_CTRL_DOORBELL) /* CXL 2.0 - 8.2.8.4 */ @@ -56,7 +51,14 @@ enum opcode { CXL_MBOX_OP_GET_LSA = 0x4102, CXL_MBOX_OP_SET_LSA = 0x4103, CXL_MBOX_OP_GET_HEALTH_INFO = 0x4200, + CXL_MBOX_OP_GET_ALERT_CONFIG = 0x4201, + CXL_MBOX_OP_SET_ALERT_CONFIG = 0x4202, + CXL_MBOX_OP_GET_SHUTDOWN_STATE = 0x4203, CXL_MBOX_OP_SET_SHUTDOWN_STATE = 0x4204, + CXL_MBOX_OP_GET_POISON = 0x4300, + CXL_MBOX_OP_INJECT_POISON = 0x4301, + CXL_MBOX_OP_CLEAR_POISON = 0x4302, + CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS = 0x4303, CXL_MBOX_OP_SCAN_MEDIA = 0x4304, CXL_MBOX_OP_GET_SCAN_MEDIA = 0x4305, CXL_MBOX_OP_MAX = 0x10000 @@ -92,20 +94,6 @@ struct mbox_cmd { #define CXL_MBOX_SUCCESS 0 }; -/** - * struct cxl_memdev - CXL bus object representing a Type-3 Memory Device - * @dev: driver core device object - * @cdev: char dev core object for ioctl operations - * @cxlm: pointer to the parent device driver data - * @id: id number of this memdev instance. - */ -struct cxl_memdev { - struct device dev; - struct cdev cdev; - struct cxl_mem *cxlm; - int id; -}; - static int cxl_mem_major; static DEFINE_IDA(cxl_memdev_ida); static DECLARE_RWSEM(cxl_memdev_rwsem); @@ -178,6 +166,18 @@ static struct cxl_mem_command mem_commands[CXL_MEM_COMMAND_ID_MAX] = { CXL_CMD(GET_LSA, 0x8, ~0, 0), CXL_CMD(GET_HEALTH_INFO, 0, 0x12, 0), CXL_CMD(GET_LOG, 0x18, ~0, CXL_CMD_FLAG_FORCE_ENABLE), + CXL_CMD(SET_PARTITION_INFO, 0x0a, 0, 0), + CXL_CMD(SET_LSA, ~0, 0, 0), + CXL_CMD(GET_ALERT_CONFIG, 0, 0x10, 0), + CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0), + CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0), + CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0), + CXL_CMD(GET_POISON, 0x10, ~0, 0), + CXL_CMD(INJECT_POISON, 0x8, 0, 0), + CXL_CMD(CLEAR_POISON, 0x48, 0, 0), + CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0), + CXL_CMD(SCAN_MEDIA, 0x11, 0, 0), + CXL_CMD(GET_SCAN_MEDIA, 0, ~0, 0), }; /* @@ -292,7 +292,7 @@ static void cxl_mem_mbox_timeout(struct cxl_mem *cxlm, static int __cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm, struct mbox_cmd *mbox_cmd) { - void __iomem *payload = cxlm->mbox_regs + CXLDEV_MBOX_PAYLOAD_OFFSET; + void __iomem *payload = cxlm->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET; u64 cmd_reg, status_reg; size_t out_len; int rc; @@ -335,12 +335,12 @@ static int __cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm, } /* #2, #3 */ - writeq(cmd_reg, cxlm->mbox_regs + CXLDEV_MBOX_CMD_OFFSET); + writeq(cmd_reg, cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); /* #4 */ dev_dbg(&cxlm->pdev->dev, "Sending command\n"); writel(CXLDEV_MBOX_CTRL_DOORBELL, - cxlm->mbox_regs + CXLDEV_MBOX_CTRL_OFFSET); + cxlm->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); /* #5 */ rc = cxl_mem_wait_for_doorbell(cxlm); @@ -350,7 +350,7 @@ static int __cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm, } /* #6 */ - status_reg = readq(cxlm->mbox_regs + CXLDEV_MBOX_STATUS_OFFSET); + status_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET); mbox_cmd->return_code = FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); @@ -360,7 +360,7 @@ static int __cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm, } /* #7 */ - cmd_reg = readq(cxlm->mbox_regs + CXLDEV_MBOX_CMD_OFFSET); + cmd_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg); /* #8 */ @@ -421,7 +421,7 @@ static int cxl_mem_mbox_get(struct cxl_mem *cxlm) goto out; } - md_status = readq(cxlm->memdev_regs + CXLMDEV_STATUS_OFFSET); + md_status = readq(cxlm->regs.memdev + CXLMDEV_STATUS_OFFSET); if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) { dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n"); rc = -EBUSY; @@ -890,75 +890,9 @@ static int cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm, u16 opcode, return 0; } -/** - * cxl_mem_setup_regs() - Setup necessary MMIO. - * @cxlm: The CXL memory device to communicate with. - * - * Return: 0 if all necessary registers mapped. - * - * A memory device is required by spec to implement a certain set of MMIO - * regions. The purpose of this function is to enumerate and map those - * registers. - */ -static int cxl_mem_setup_regs(struct cxl_mem *cxlm) -{ - struct device *dev = &cxlm->pdev->dev; - int cap, cap_count; - u64 cap_array; - - cap_array = readq(cxlm->regs + CXLDEV_CAP_ARRAY_OFFSET); - if (FIELD_GET(CXLDEV_CAP_ARRAY_ID_MASK, cap_array) != - CXLDEV_CAP_ARRAY_CAP_ID) - return -ENODEV; - - cap_count = FIELD_GET(CXLDEV_CAP_ARRAY_COUNT_MASK, cap_array); - - for (cap = 1; cap <= cap_count; cap++) { - void __iomem *register_block; - u32 offset; - u16 cap_id; - - cap_id = FIELD_GET(CXLDEV_CAP_HDR_CAP_ID_MASK, - readl(cxlm->regs + cap * 0x10)); - offset = readl(cxlm->regs + cap * 0x10 + 0x4); - register_block = cxlm->regs + offset; - - switch (cap_id) { - case CXLDEV_CAP_CAP_ID_DEVICE_STATUS: - dev_dbg(dev, "found Status capability (0x%x)\n", offset); - cxlm->status_regs = register_block; - break; - case CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX: - dev_dbg(dev, "found Mailbox capability (0x%x)\n", offset); - cxlm->mbox_regs = register_block; - break; - case CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX: - dev_dbg(dev, "found Secondary Mailbox capability (0x%x)\n", offset); - break; - case CXLDEV_CAP_CAP_ID_MEMDEV: - dev_dbg(dev, "found Memory Device capability (0x%x)\n", offset); - cxlm->memdev_regs = register_block; - break; - default: - dev_dbg(dev, "Unknown cap ID: %d (0x%x)\n", cap_id, offset); - break; - } - } - - if (!cxlm->status_regs || !cxlm->mbox_regs || !cxlm->memdev_regs) { - dev_err(dev, "registers not found: %s%s%s\n", - !cxlm->status_regs ? "status " : "", - !cxlm->mbox_regs ? "mbox " : "", - !cxlm->memdev_regs ? "memdev" : ""); - return -ENXIO; - } - - return 0; -} - static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm) { - const int cap = readl(cxlm->mbox_regs + CXLDEV_MBOX_CAPS_OFFSET); + const int cap = readl(cxlm->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); cxlm->payload_size = 1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap); @@ -983,55 +917,62 @@ static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm) return 0; } -static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev, u32 reg_lo, - u32 reg_hi) +static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev) { struct device *dev = &pdev->dev; struct cxl_mem *cxlm; - void __iomem *regs; - u64 offset; - u8 bar; - int rc; - cxlm = devm_kzalloc(&pdev->dev, sizeof(*cxlm), GFP_KERNEL); + cxlm = devm_kzalloc(dev, sizeof(*cxlm), GFP_KERNEL); if (!cxlm) { dev_err(dev, "No memory available\n"); - return NULL; - } - - offset = ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK); - bar = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo); - - /* Basic sanity check that BAR is big enough */ - if (pci_resource_len(pdev, bar) < offset) { - dev_err(dev, "BAR%d: %pr: too small (offset: %#llx)\n", bar, - &pdev->resource[bar], (unsigned long long)offset); - return NULL; - } - - rc = pcim_iomap_regions(pdev, BIT(bar), pci_name(pdev)); - if (rc) { - dev_err(dev, "failed to map registers\n"); - return NULL; + return ERR_PTR(-ENOMEM); } - regs = pcim_iomap_table(pdev)[bar]; mutex_init(&cxlm->mbox_mutex); cxlm->pdev = pdev; - cxlm->regs = regs + offset; cxlm->enabled_cmds = devm_kmalloc_array(dev, BITS_TO_LONGS(cxl_cmd_count), sizeof(unsigned long), GFP_KERNEL | __GFP_ZERO); if (!cxlm->enabled_cmds) { dev_err(dev, "No memory available for bitmap\n"); - return NULL; + return ERR_PTR(-ENOMEM); } - dev_dbg(dev, "Mapped CXL Memory Device resource\n"); return cxlm; } +static void __iomem *cxl_mem_map_regblock(struct cxl_mem *cxlm, + u8 bar, u64 offset) +{ + struct pci_dev *pdev = cxlm->pdev; + struct device *dev = &pdev->dev; + void __iomem *addr; + + /* Basic sanity check that BAR is big enough */ + if (pci_resource_len(pdev, bar) < offset) { + dev_err(dev, "BAR%d: %pr: too small (offset: %#llx)\n", bar, + &pdev->resource[bar], (unsigned long long)offset); + return IOMEM_ERR_PTR(-ENXIO); + } + + addr = pci_iomap(pdev, bar, 0); + if (!addr) { + dev_err(dev, "failed to map registers\n"); + return addr; + } + + dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %#llx\n", + bar, offset); + + return addr; +} + +static void cxl_mem_unmap_regblock(struct cxl_mem *cxlm, void __iomem *base) +{ + pci_iounmap(cxlm->pdev, base); +} + static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec) { int pos; @@ -1055,6 +996,171 @@ static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec) return 0; } +static int cxl_probe_regs(struct cxl_mem *cxlm, void __iomem *base, + struct cxl_register_map *map) +{ + struct pci_dev *pdev = cxlm->pdev; + struct device *dev = &pdev->dev; + struct cxl_component_reg_map *comp_map; + struct cxl_device_reg_map *dev_map; + + switch (map->reg_type) { + case CXL_REGLOC_RBI_COMPONENT: + comp_map = &map->component_map; + cxl_probe_component_regs(dev, base, comp_map); + if (!comp_map->hdm_decoder.valid) { + dev_err(dev, "HDM decoder registers not found\n"); + return -ENXIO; + } + + dev_dbg(dev, "Set up component registers\n"); + break; + case CXL_REGLOC_RBI_MEMDEV: + dev_map = &map->device_map; + cxl_probe_device_regs(dev, base, dev_map); + if (!dev_map->status.valid || !dev_map->mbox.valid || + !dev_map->memdev.valid) { + dev_err(dev, "registers not found: %s%s%s\n", + !dev_map->status.valid ? "status " : "", + !dev_map->mbox.valid ? "status " : "", + !dev_map->memdev.valid ? "status " : ""); + return -ENXIO; + } + + dev_dbg(dev, "Probing device registers...\n"); + break; + default: + break; + } + + return 0; +} + +static int cxl_map_regs(struct cxl_mem *cxlm, struct cxl_register_map *map) +{ + struct pci_dev *pdev = cxlm->pdev; + struct device *dev = &pdev->dev; + + switch (map->reg_type) { + case CXL_REGLOC_RBI_COMPONENT: + cxl_map_component_regs(pdev, &cxlm->regs.component, map); + dev_dbg(dev, "Mapping component registers...\n"); + break; + case CXL_REGLOC_RBI_MEMDEV: + cxl_map_device_regs(pdev, &cxlm->regs.device_regs, map); + dev_dbg(dev, "Probing device registers...\n"); + break; + default: + break; + } + + return 0; +} + +static void cxl_decode_register_block(u32 reg_lo, u32 reg_hi, + u8 *bar, u64 *offset, u8 *reg_type) +{ + *offset = ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK); + *bar = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo); + *reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo); +} + +/** + * cxl_mem_setup_regs() - Setup necessary MMIO. + * @cxlm: The CXL memory device to communicate with. + * + * Return: 0 if all necessary registers mapped. + * + * A memory device is required by spec to implement a certain set of MMIO + * regions. The purpose of this function is to enumerate and map those + * registers. + */ +static int cxl_mem_setup_regs(struct cxl_mem *cxlm) +{ + struct pci_dev *pdev = cxlm->pdev; + struct device *dev = &pdev->dev; + u32 regloc_size, regblocks; + void __iomem *base; + int regloc, i; + struct cxl_register_map *map, *n; + LIST_HEAD(register_maps); + int ret = 0; + + regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID); + if (!regloc) { + dev_err(dev, "register location dvsec not found\n"); + return -ENXIO; + } + + if (pci_request_mem_regions(pdev, pci_name(pdev))) + return -ENODEV; + + /* Get the size of the Register Locator DVSEC */ + pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); + regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size); + + regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET; + regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8; + + for (i = 0; i < regblocks; i++, regloc += 8) { + u32 reg_lo, reg_hi; + u8 reg_type; + u64 offset; + u8 bar; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) { + ret = -ENOMEM; + goto free_maps; + } + + list_add(&map->list, ®ister_maps); + + pci_read_config_dword(pdev, regloc, ®_lo); + pci_read_config_dword(pdev, regloc + 4, ®_hi); + + cxl_decode_register_block(reg_lo, reg_hi, &bar, &offset, + ®_type); + + dev_dbg(dev, "Found register block in bar %u @ 0x%llx of type %u\n", + bar, offset, reg_type); + + base = cxl_mem_map_regblock(cxlm, bar, offset); + if (!base) { + ret = -ENOMEM; + goto free_maps; + } + + map->barno = bar; + map->block_offset = offset; + map->reg_type = reg_type; + + ret = cxl_probe_regs(cxlm, base + offset, map); + + /* Always unmap the regblock regardless of probe success */ + cxl_mem_unmap_regblock(cxlm, base); + + if (ret) + goto free_maps; + } + + pci_release_mem_regions(pdev); + + list_for_each_entry(map, ®ister_maps, list) { + ret = cxl_map_regs(cxlm, map); + if (ret) + goto free_maps; + } + +free_maps: + list_for_each_entry_safe(map, n, ®ister_maps, list) { + list_del(&map->list); + kfree(map); + } + + return ret; +} + static struct cxl_memdev *to_cxl_memdev(struct device *dev) { return container_of(dev, struct cxl_memdev, dev); @@ -1094,6 +1200,16 @@ static ssize_t payload_max_show(struct device *dev, } static DEVICE_ATTR_RO(payload_max); +static ssize_t label_storage_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_mem *cxlm = cxlmd->cxlm; + + return sysfs_emit(buf, "%zu\n", cxlm->lsa_size); +} +static DEVICE_ATTR_RO(label_storage_size); + static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1123,6 +1239,7 @@ static struct device_attribute dev_attr_pmem_size = static struct attribute *cxl_memdev_attributes[] = { &dev_attr_firmware_version.attr, &dev_attr_payload_max.attr, + &dev_attr_label_storage_size.attr, NULL, }; @@ -1215,7 +1332,8 @@ err: return ERR_PTR(rc); } -static int cxl_mem_add_memdev(struct cxl_mem *cxlm) +static struct cxl_memdev *devm_cxl_add_memdev(struct device *host, + struct cxl_mem *cxlm) { struct cxl_memdev *cxlmd; struct device *dev; @@ -1224,7 +1342,7 @@ static int cxl_mem_add_memdev(struct cxl_mem *cxlm) cxlmd = cxl_memdev_alloc(cxlm); if (IS_ERR(cxlmd)) - return PTR_ERR(cxlmd); + return cxlmd; dev = &cxlmd->dev; rc = dev_set_name(dev, "mem%d", cxlmd->id); @@ -1242,8 +1360,10 @@ static int cxl_mem_add_memdev(struct cxl_mem *cxlm) if (rc) goto err; - return devm_add_action_or_reset(dev->parent, cxl_memdev_unregister, - cxlmd); + rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd); + if (rc) + return ERR_PTR(rc); + return cxlmd; err: /* @@ -1252,7 +1372,7 @@ err: */ cxl_memdev_shutdown(cxlmd); put_device(dev); - return rc; + return ERR_PTR(rc); } static int cxl_xfer_log(struct cxl_mem *cxlm, uuid_t *uuid, u32 size, u8 *out) @@ -1455,6 +1575,7 @@ static int cxl_mem_identify(struct cxl_mem *cxlm) cxlm->pmem_range.end = le64_to_cpu(id.persistent_capacity) * SZ_256M - 1; + cxlm->lsa_size = le32_to_cpu(id.lsa_size); memcpy(cxlm->firmware_version, id.fw_revision, sizeof(id.fw_revision)); return 0; @@ -1462,46 +1583,17 @@ static int cxl_mem_identify(struct cxl_mem *cxlm) static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - struct device *dev = &pdev->dev; - struct cxl_mem *cxlm = NULL; - u32 regloc_size, regblocks; - int rc, regloc, i; + struct cxl_memdev *cxlmd; + struct cxl_mem *cxlm; + int rc; rc = pcim_enable_device(pdev); if (rc) return rc; - regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC_OFFSET); - if (!regloc) { - dev_err(dev, "register location dvsec not found\n"); - return -ENXIO; - } - - /* Get the size of the Register Locator DVSEC */ - pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); - regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size); - - regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET; - regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8; - - for (i = 0; i < regblocks; i++, regloc += 8) { - u32 reg_lo, reg_hi; - u8 reg_type; - - /* "register low and high" contain other bits */ - pci_read_config_dword(pdev, regloc, ®_lo); - pci_read_config_dword(pdev, regloc + 4, ®_hi); - - reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo); - - if (reg_type == CXL_REGLOC_RBI_MEMDEV) { - cxlm = cxl_mem_create(pdev, reg_lo, reg_hi); - break; - } - } - - if (!cxlm) - return -ENODEV; + cxlm = cxl_mem_create(pdev); + if (IS_ERR(cxlm)) + return PTR_ERR(cxlm); rc = cxl_mem_setup_regs(cxlm); if (rc) @@ -1519,7 +1611,14 @@ static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - return cxl_mem_add_memdev(cxlm); + cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlm); + if (IS_ERR(cxlmd)) + return PTR_ERR(cxlmd); + + if (range_len(&cxlm->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM)) + rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd); + + return rc; } static const struct pci_device_id cxl_mem_pci_tbl[] = { @@ -1544,6 +1643,10 @@ static __init int cxl_mem_init(void) dev_t devt; int rc; + /* Double check the anonymous union trickery in struct cxl_regs */ + BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) != + offsetof(struct cxl_regs, device_regs.memdev)); + rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl"); if (rc) return rc; diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h index af3ec078cf6c..dad7a831f65f 100644 --- a/drivers/cxl/pci.h +++ b/drivers/cxl/pci.h @@ -13,7 +13,7 @@ #define PCI_DVSEC_VENDOR_ID_CXL 0x1E98 #define PCI_DVSEC_ID_CXL 0x0 -#define PCI_DVSEC_ID_CXL_REGLOC_OFFSET 0x8 +#define PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID 0x8 #define PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET 0xC /* BAR Indicator Register (BIR) */ diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c new file mode 100644 index 000000000000..0088e41dd2f3 --- /dev/null +++ b/drivers/cxl/pmem.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2021 Intel Corporation. All rights reserved. */ +#include <linux/libnvdimm.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/ndctl.h> +#include <linux/async.h> +#include <linux/slab.h> +#include "mem.h" +#include "cxl.h" + +/* + * Ordered workqueue for cxl nvdimm device arrival and departure + * to coordinate bus rescans when a bridge arrives and trigger remove + * operations when the bridge is removed. + */ +static struct workqueue_struct *cxl_pmem_wq; + +static void unregister_nvdimm(void *nvdimm) +{ + nvdimm_delete(nvdimm); +} + +static int match_nvdimm_bridge(struct device *dev, const void *data) +{ + return strcmp(dev_name(dev), "nvdimm-bridge") == 0; +} + +static struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(void) +{ + struct device *dev; + + dev = bus_find_device(&cxl_bus_type, NULL, NULL, match_nvdimm_bridge); + if (!dev) + return NULL; + return to_cxl_nvdimm_bridge(dev); +} + +static int cxl_nvdimm_probe(struct device *dev) +{ + struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev); + struct cxl_nvdimm_bridge *cxl_nvb; + unsigned long flags = 0; + struct nvdimm *nvdimm; + int rc = -ENXIO; + + cxl_nvb = cxl_find_nvdimm_bridge(); + if (!cxl_nvb) + return -ENXIO; + + device_lock(&cxl_nvb->dev); + if (!cxl_nvb->nvdimm_bus) + goto out; + + set_bit(NDD_LABELING, &flags); + nvdimm = nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, NULL, flags, 0, 0, + NULL); + if (!nvdimm) + goto out; + + rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm); +out: + device_unlock(&cxl_nvb->dev); + put_device(&cxl_nvb->dev); + + return rc; +} + +static struct cxl_driver cxl_nvdimm_driver = { + .name = "cxl_nvdimm", + .probe = cxl_nvdimm_probe, + .id = CXL_DEVICE_NVDIMM, +}; + +static int cxl_pmem_ctl(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len, int *cmd_rc) +{ + return -ENOTTY; +} + +static bool online_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb) +{ + if (cxl_nvb->nvdimm_bus) + return true; + cxl_nvb->nvdimm_bus = + nvdimm_bus_register(&cxl_nvb->dev, &cxl_nvb->nd_desc); + return cxl_nvb->nvdimm_bus != NULL; +} + +static int cxl_nvdimm_release_driver(struct device *dev, void *data) +{ + if (!is_cxl_nvdimm(dev)) + return 0; + device_release_driver(dev); + return 0; +} + +static void offline_nvdimm_bus(struct nvdimm_bus *nvdimm_bus) +{ + if (!nvdimm_bus) + return; + + /* + * Set the state of cxl_nvdimm devices to unbound / idle before + * nvdimm_bus_unregister() rips the nvdimm objects out from + * underneath them. + */ + bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_nvdimm_release_driver); + nvdimm_bus_unregister(nvdimm_bus); +} + +static void cxl_nvb_update_state(struct work_struct *work) +{ + struct cxl_nvdimm_bridge *cxl_nvb = + container_of(work, typeof(*cxl_nvb), state_work); + struct nvdimm_bus *victim_bus = NULL; + bool release = false, rescan = false; + + device_lock(&cxl_nvb->dev); + switch (cxl_nvb->state) { + case CXL_NVB_ONLINE: + if (!online_nvdimm_bus(cxl_nvb)) { + dev_err(&cxl_nvb->dev, + "failed to establish nvdimm bus\n"); + release = true; + } else + rescan = true; + break; + case CXL_NVB_OFFLINE: + case CXL_NVB_DEAD: + victim_bus = cxl_nvb->nvdimm_bus; + cxl_nvb->nvdimm_bus = NULL; + break; + default: + break; + } + device_unlock(&cxl_nvb->dev); + + if (release) + device_release_driver(&cxl_nvb->dev); + if (rescan) { + int rc = bus_rescan_devices(&cxl_bus_type); + + dev_dbg(&cxl_nvb->dev, "rescan: %d\n", rc); + } + offline_nvdimm_bus(victim_bus); + + put_device(&cxl_nvb->dev); +} + +static void cxl_nvdimm_bridge_remove(struct device *dev) +{ + struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev); + + if (cxl_nvb->state == CXL_NVB_ONLINE) + cxl_nvb->state = CXL_NVB_OFFLINE; + if (queue_work(cxl_pmem_wq, &cxl_nvb->state_work)) + get_device(&cxl_nvb->dev); +} + +static int cxl_nvdimm_bridge_probe(struct device *dev) +{ + struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev); + + if (cxl_nvb->state == CXL_NVB_DEAD) + return -ENXIO; + + if (cxl_nvb->state == CXL_NVB_NEW) { + cxl_nvb->nd_desc = (struct nvdimm_bus_descriptor) { + .provider_name = "CXL", + .module = THIS_MODULE, + .ndctl = cxl_pmem_ctl, + }; + + INIT_WORK(&cxl_nvb->state_work, cxl_nvb_update_state); + } + + cxl_nvb->state = CXL_NVB_ONLINE; + if (queue_work(cxl_pmem_wq, &cxl_nvb->state_work)) + get_device(&cxl_nvb->dev); + + return 0; +} + +static struct cxl_driver cxl_nvdimm_bridge_driver = { + .name = "cxl_nvdimm_bridge", + .probe = cxl_nvdimm_bridge_probe, + .remove = cxl_nvdimm_bridge_remove, + .id = CXL_DEVICE_NVDIMM_BRIDGE, +}; + +static __init int cxl_pmem_init(void) +{ + int rc; + + cxl_pmem_wq = alloc_ordered_workqueue("cxl_pmem", 0); + if (!cxl_pmem_wq) + return -ENXIO; + + rc = cxl_driver_register(&cxl_nvdimm_bridge_driver); + if (rc) + goto err_bridge; + + rc = cxl_driver_register(&cxl_nvdimm_driver); + if (rc) + goto err_nvdimm; + + return 0; + +err_nvdimm: + cxl_driver_unregister(&cxl_nvdimm_bridge_driver); +err_bridge: + destroy_workqueue(cxl_pmem_wq); + return rc; +} + +static __exit void cxl_pmem_exit(void) +{ + cxl_driver_unregister(&cxl_nvdimm_driver); + cxl_driver_unregister(&cxl_nvdimm_bridge_driver); + destroy_workqueue(cxl_pmem_wq); +} + +MODULE_LICENSE("GPL v2"); +module_init(cxl_pmem_init); +module_exit(cxl_pmem_exit); +MODULE_IMPORT_NS(CXL); +MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM_BRIDGE); +MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM); diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 3a777d0073b7..e6aa87043a95 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -363,8 +363,13 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, nvdimm_bus->dev.groups = nd_desc->attr_groups; nvdimm_bus->dev.bus = &nvdimm_bus_type; nvdimm_bus->dev.of_node = nd_desc->of_node; - dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); - rc = device_register(&nvdimm_bus->dev); + device_initialize(&nvdimm_bus->dev); + device_set_pm_not_required(&nvdimm_bus->dev); + rc = dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); + if (rc) + goto err; + + rc = device_add(&nvdimm_bus->dev); if (rc) { dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc); goto err; @@ -396,21 +401,10 @@ static int child_unregister(struct device *dev, void *data) if (dev->class) return 0; - if (is_nvdimm(dev)) { - struct nvdimm *nvdimm = to_nvdimm(dev); - bool dev_put = false; - - /* We are shutting down. Make state frozen artificially. */ - nvdimm_bus_lock(dev); - set_bit(NVDIMM_SECURITY_FROZEN, &nvdimm->sec.flags); - if (test_and_clear_bit(NDD_WORK_PENDING, &nvdimm->flags)) - dev_put = true; - nvdimm_bus_unlock(dev); - cancel_delayed_work_sync(&nvdimm->dwork); - if (dev_put) - put_device(dev); - } - nd_device_unregister(dev, ND_SYNC); + if (is_nvdimm(dev)) + nvdimm_delete(to_nvdimm(dev)); + else + nd_device_unregister(dev, ND_SYNC); return 0; } @@ -536,6 +530,7 @@ void __nd_device_register(struct device *dev) set_dev_node(dev, to_nd_region(dev)->numa_node); dev->bus = &nvdimm_bus_type; + device_set_pm_not_required(dev); if (dev->parent) { get_device(dev->parent); if (dev_to_node(dev) == NUMA_NO_NODE) @@ -728,18 +723,41 @@ const struct attribute_group nd_numa_attribute_group = { .is_visible = nd_numa_attr_visible, }; +static void ndctl_release(struct device *dev) +{ + kfree(dev); +} + int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus) { dev_t devt = MKDEV(nvdimm_bus_major, nvdimm_bus->id); struct device *dev; + int rc; - dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus, - "ndctl%d", nvdimm_bus->id); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + device_initialize(dev); + device_set_pm_not_required(dev); + dev->class = nd_class; + dev->parent = &nvdimm_bus->dev; + dev->devt = devt; + dev->release = ndctl_release; + rc = dev_set_name(dev, "ndctl%d", nvdimm_bus->id); + if (rc) + goto err; - if (IS_ERR(dev)) - dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n", - nvdimm_bus->id, PTR_ERR(dev)); - return PTR_ERR_OR_ZERO(dev); + rc = device_add(dev); + if (rc) { + dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %d\n", + nvdimm_bus->id, rc); + goto err; + } + return 0; + +err: + put_device(dev); + return rc; } void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus) diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 9d208570d059..dc7449a40003 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -642,6 +642,24 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, } EXPORT_SYMBOL_GPL(__nvdimm_create); +void nvdimm_delete(struct nvdimm *nvdimm) +{ + struct device *dev = &nvdimm->dev; + bool dev_put = false; + + /* We are shutting down. Make state frozen artificially. */ + nvdimm_bus_lock(dev); + set_bit(NVDIMM_SECURITY_FROZEN, &nvdimm->sec.flags); + if (test_and_clear_bit(NDD_WORK_PENDING, &nvdimm->flags)) + dev_put = true; + nvdimm_bus_unlock(dev); + cancel_delayed_work_sync(&nvdimm->dwork); + if (dev_put) + put_device(dev); + nd_device_unregister(dev, ND_SYNC); +} +EXPORT_SYMBOL_GPL(nvdimm_delete); + static void shutdown_security_notify(void *data) { struct nvdimm *nvdimm = data; |