From 920f6468924f8dc7e0e6e1510d000888592ef861 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 30 May 2024 18:04:29 -0700 Subject: PCI: Warn on missing cfg_access_lock during secondary bus reset The recent adventure with adding lockdep tracking for cfg_access_lock, while it yielded many false positives [1], did catch a true positive in the pci_reset_bus() path [2]. So, while lockdep is difficult to deploy, open coding a check that cfg_access_lock is held during the reset is feasible. While this does not offer a full backtrace, it should be sufficient to implicate the caller of pci_bridge_secondary_bus_reset() as a path that needs investigation. Link: https://lore.kernel.org/r/171711746953.1628941.4692125082286867825.stgit@dwillia2-xfh.jf.intel.com Link: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_134186v1/shard-dg2-1/igt@device_reset@unbind-reset-rebind.html [1] Link: http://lore.kernel.org/r/cfb50601-5d2a-4676-a958-1bd3f1b06654@intel.com [2] Signed-off-by: Dan Williams Signed-off-by: Bjorn Helgaas Tested-by: Hans de Goede Tested-by: Kalle Valo Reviewed-by: Dave Jiang --- drivers/pci/pci.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 59e0949fb079..4821c22eca1f 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4884,6 +4884,9 @@ void __weak pcibios_reset_secondary_bus(struct pci_dev *dev) int pci_bridge_secondary_bus_reset(struct pci_dev *dev) { lock_map_assert_held(&dev->cfg_access_lock); + if (!dev->block_cfg_access) + pci_warn_once(dev, "unlocked secondary bus reset via: %pS\n", + __builtin_return_address(0)); pcibios_reset_secondary_bus(dev); return pci_bridge_wait_for_secondary_bus(dev, "bus reset"); -- cgit v1.2.3 From a4e772898f8bf2e7e1cf661a12c60a5612c4afab Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 30 May 2024 18:04:35 -0700 Subject: PCI: Add missing bridge lock to pci_bus_lock() One of the true positives that the cfg_access_lock lockdep effort identified is this sequence: WARNING: CPU: 14 PID: 1 at drivers/pci/pci.c:4886 pci_bridge_secondary_bus_reset+0x5d/0x70 RIP: 0010:pci_bridge_secondary_bus_reset+0x5d/0x70 Call Trace: ? __warn+0x8c/0x190 ? pci_bridge_secondary_bus_reset+0x5d/0x70 ? report_bug+0x1f8/0x200 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x18/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? pci_bridge_secondary_bus_reset+0x5d/0x70 pci_reset_bus+0x1d8/0x270 vmd_probe+0x778/0xa10 pci_device_probe+0x95/0x120 Where pci_reset_bus() users are triggering unlocked secondary bus resets. Ironically pci_bus_reset(), several calls down from pci_reset_bus(), uses pci_bus_lock() before issuing the reset which locks everything *but* the bridge itself. For the same motivation as adding: bridge = pci_upstream_bridge(dev); if (bridge) pci_dev_lock(bridge); to pci_reset_function() for the "bus" and "cxl_bus" reset cases, add pci_dev_lock() for @bus->self to pci_bus_lock(). Link: https://lore.kernel.org/r/171711747501.1628941.15217746952476635316.stgit@dwillia2-xfh.jf.intel.com Reported-by: Imre Deak Closes: http://lore.kernel.org/r/6657833b3b5ae_14984b29437@dwillia2-xfh.jf.intel.com.notmuch Signed-off-by: Dan Williams Signed-off-by: Keith Busch [bhelgaas: squash in recursive locking deadlock fix from Keith Busch: https://lore.kernel.org/r/20240711193650.701834-1-kbusch@meta.com] Signed-off-by: Bjorn Helgaas Tested-by: Hans de Goede Tested-by: Kalle Valo Reviewed-by: Dave Jiang --- drivers/pci/pci.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 4821c22eca1f..43ce99a5dc22 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5445,10 +5445,12 @@ static void pci_bus_lock(struct pci_bus *bus) { struct pci_dev *dev; + pci_dev_lock(bus->self); list_for_each_entry(dev, &bus->devices, bus_list) { - pci_dev_lock(dev); if (dev->subordinate) pci_bus_lock(dev->subordinate); + else + pci_dev_lock(dev); } } @@ -5460,8 +5462,10 @@ static void pci_bus_unlock(struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { if (dev->subordinate) pci_bus_unlock(dev->subordinate); - pci_dev_unlock(dev); + else + pci_dev_unlock(dev); } + pci_dev_unlock(bus->self); } /* Return 1 on successful lock, 0 on contention */ @@ -5469,15 +5473,15 @@ static int pci_bus_trylock(struct pci_bus *bus) { struct pci_dev *dev; + if (!pci_dev_trylock(bus->self)) + return 0; + list_for_each_entry(dev, &bus->devices, bus_list) { - if (!pci_dev_trylock(dev)) - goto unlock; if (dev->subordinate) { - if (!pci_bus_trylock(dev->subordinate)) { - pci_dev_unlock(dev); + if (!pci_bus_trylock(dev->subordinate)) goto unlock; - } - } + } else if (!pci_dev_trylock(dev)) + goto unlock; } return 1; @@ -5485,8 +5489,10 @@ unlock: list_for_each_entry_continue_reverse(dev, &bus->devices, bus_list) { if (dev->subordinate) pci_bus_unlock(dev->subordinate); - pci_dev_unlock(dev); + else + pci_dev_unlock(dev); } + pci_dev_unlock(bus->self); return 0; } @@ -5518,9 +5524,10 @@ static void pci_slot_lock(struct pci_slot *slot) list_for_each_entry(dev, &slot->bus->devices, bus_list) { if (!dev->slot || dev->slot != slot) continue; - pci_dev_lock(dev); if (dev->subordinate) pci_bus_lock(dev->subordinate); + else + pci_dev_lock(dev); } } @@ -5546,14 +5553,13 @@ static int pci_slot_trylock(struct pci_slot *slot) list_for_each_entry(dev, &slot->bus->devices, bus_list) { if (!dev->slot || dev->slot != slot) continue; - if (!pci_dev_trylock(dev)) - goto unlock; if (dev->subordinate) { if (!pci_bus_trylock(dev->subordinate)) { pci_dev_unlock(dev); goto unlock; } - } + } else if (!pci_dev_trylock(dev)) + goto unlock; } return 1; @@ -5564,7 +5570,8 @@ unlock: continue; if (dev->subordinate) pci_bus_unlock(dev->subordinate); - pci_dev_unlock(dev); + else + pci_dev_unlock(dev); } return 0; } -- cgit v1.2.3