diff options
author | Dave Jiang <dave.jiang@intel.com> | 2024-04-26 15:47:56 -0700 |
---|---|---|
committer | Dave Jiang <dave.jiang@intel.com> | 2024-04-29 09:03:26 -0700 |
commit | 5d211c7090590033581175d6405ae40917ca3a06 (patch) | |
tree | 3239fa3608f69ef70452b3a92b530de31b7f4434 | |
parent | e67572cd2204894179d89bd7b984072f19313b03 (diff) | |
download | lwn-5d211c7090590033581175d6405ae40917ca3a06.tar.gz lwn-5d211c7090590033581175d6405ae40917ca3a06.zip |
cxl: Fix cxl_endpoint_get_perf_coordinate() support for RCH
Robert reported the following when booting a CXL host with Restricted CXL
Host (RCH) topology:
[ 39.815379] cxl_acpi ACPI0017:00: not a cxl_port device
[ 39.827123] WARNING: CPU: 46 PID: 1754 at drivers/cxl/core/port.c:592 to_cxl_port+0x56/0x70 [cxl_core]
... plus some related subsequent NULL pointer dereference:
[ 40.718708] BUG: kernel NULL pointer dereference, address: 00000000000002d8
The iterator to walk the PCIe path did not account for RCH topology.
However RCH does not support hotplug and the memory exported by the
Restricted CXL Device (RCD) should be covered by HMAT and therefore no
access_coordinate is needed. Add check to see if the endpoint device is
RCD and skip calculation.
Also add a call to cxl_endpoint_get_perf_coordinates() in cxl_test in order
to exercise the topology iterator. The dev_is_pci() check added is to help
with this test and should be harmless for normal operation.
Reported-by: Robert Richter <rrichter@amd.com>
Closes: https://lore.kernel.org/all/Ziv8GfSMSbvlBB0h@rric.localdomain/
Fixes: 592780b8391f ("cxl: Fix retrieving of access_coordinates in PCIe path")
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Robert Richter <rrichter@amd.com>
Reviewed-by: Robert Richter <rrichter@amd.com>
Link: https://lore.kernel.org/r/20240426224913.1027420-1-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
-rw-r--r-- | drivers/cxl/core/port.c | 15 | ||||
-rw-r--r-- | tools/testing/cxl/test/cxl.c | 7 |
2 files changed, 21 insertions, 1 deletions
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 762783bb091a..887ed6e358fb 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -2184,6 +2184,7 @@ static bool parent_port_is_cxl_root(struct cxl_port *port) int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, struct access_coordinate *coord) { + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); struct access_coordinate c[] = { { .read_bandwidth = UINT_MAX, @@ -2197,6 +2198,7 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, struct cxl_port *iter = port; struct cxl_dport *dport; struct pci_dev *pdev; + struct device *dev; unsigned int bw; bool is_cxl_root; @@ -2204,6 +2206,13 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, return -EINVAL; /* + * Skip calculation for RCD. Expectation is HMAT already covers RCD case + * since RCH does not support hotplug. + */ + if (cxlmd->cxlds->rcd) + return 0; + + /* * Exit the loop when the parent port of the current iter port is cxl * root. The iterative loop starts at the endpoint and gathers the * latency of the CXL link from the current device/port to the connected @@ -2232,8 +2241,12 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, return -EINVAL; cxl_coordinates_combine(c, c, dport->coord); + dev = port->uport_dev->parent; + if (!dev_is_pci(dev)) + return -ENODEV; + /* Get the calculated PCI paths bandwidth */ - pdev = to_pci_dev(port->uport_dev->parent); + pdev = to_pci_dev(dev); bw = pcie_bandwidth_available(pdev, NULL, NULL, NULL); if (bw == 0) return -ENXIO; diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 61c69297e797..3482248aa344 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -1001,6 +1001,7 @@ static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port) struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct access_coordinate ep_c[ACCESS_COORDINATE_MAX]; struct range pmem_range = { .start = cxlds->pmem_res.start, .end = cxlds->pmem_res.end, @@ -1020,6 +1021,12 @@ static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port) dpa_perf_setup(port, &pmem_range, &mds->pmem_perf); cxl_memdev_update_perf(cxlmd); + + /* + * This function is here to only test the topology iterator. It serves + * no other purpose. + */ + cxl_endpoint_get_perf_coordinates(port, ep_c); } static struct cxl_mock_ops cxl_mock_ops = { |