diff options
author | Thinh Tran <thinhtr@linux.vnet.ibm.com> | 2023-08-18 11:14:40 -0500 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-08-22 17:07:40 -0700 |
commit | bf23ffc8a9a777dfdeb04232e0946b803adbb6a9 (patch) | |
tree | 6da9fccef79ecdf55c7803dc2d8a0614c96389e6 | |
parent | 6dc5774deefe38d9ab385a5dafbe6614ae63d166 (diff) | |
download | lwn-bf23ffc8a9a777dfdeb04232e0946b803adbb6a9.tar.gz lwn-bf23ffc8a9a777dfdeb04232e0946b803adbb6a9.zip |
bnx2x: new flag for track HW resource allocation
While injecting PCIe errors to the upstream PCIe switch of
a BCM57810 NIC, system hangs/crashes were observed.
After several calls to bnx2x_tx_timout() complete,
bnx2x_nic_unload() is called to free up HW resources
and bnx2x_napi_disable() is called to release NAPI objects.
Later, when the EEH driver calls bnx2x_io_slot_reset() to
complete the recovery process, bnx2x attempts to disable
NAPI again by calling bnx2x_napi_disable() and freeing
resources which have already been freed, resulting in a
hang or crash.
Introduce a new flag to track the HW resource and NAPI
allocation state, refactor duplicated code into a single
function, check page pool allocation status before freeing,
and reduces debug output when a TX timeout event occurs.
Reviewed-by: Manish Chopra <manishc@marvell.com>
Tested-by: Abdul Haleem <abdhalee@in.ibm.com>
Tested-by: David Christensen <drc@linux.vnet.ibm.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Tested-by: Venkata Sai Duggi <venkata.sai.duggi@ibm.com>
Signed-off-by: Thinh Tran <thinhtr@linux.vnet.ibm.com>
Link: https://lore.kernel.org/r/20230818161443.708785-2-thinhtr@linux.vnet.ibm.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 21 | ||||
-rw-r--r-- | drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 32 | ||||
-rw-r--r-- | drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 17 |
4 files changed, 44 insertions, 28 deletions
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 8bcde0a6e011..e2a4e1088b7f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1508,6 +1508,8 @@ struct bnx2x { bool cnic_loaded; struct cnic_eth_dev *(*cnic_probe)(struct net_device *); + bool nic_stopped; + /* Flag that indicates that we can start looking for FCoE L2 queue * completions in the default status block. */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 6ea5521074d3..e9c1e1bb5580 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2715,6 +2715,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) bnx2x_add_all_napi(bp); DP(NETIF_MSG_IFUP, "napi added\n"); bnx2x_napi_enable(bp); + bp->nic_stopped = false; if (IS_PF(bp)) { /* set pf load just before approaching the MCP */ @@ -2960,6 +2961,7 @@ load_error2: load_error1: bnx2x_napi_disable(bp); bnx2x_del_all_napi(bp); + bp->nic_stopped = true; /* clear pf_load status, as it was already set */ if (IS_PF(bp)) @@ -3095,14 +3097,17 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) if (!CHIP_IS_E1x(bp)) bnx2x_pf_disable(bp); - /* Disable HW interrupts, NAPI */ - bnx2x_netif_stop(bp, 1); - /* Delete all NAPI objects */ - bnx2x_del_all_napi(bp); - if (CNIC_LOADED(bp)) - bnx2x_del_all_napi_cnic(bp); - /* Release IRQs */ - bnx2x_free_irq(bp); + if (!bp->nic_stopped) { + /* Disable HW interrupts, NAPI */ + bnx2x_netif_stop(bp, 1); + /* Delete all NAPI objects */ + bnx2x_del_all_napi(bp); + if (CNIC_LOADED(bp)) + bnx2x_del_all_napi_cnic(bp); + /* Release IRQs */ + bnx2x_free_irq(bp); + bp->nic_stopped = true; + } /* Report UNLOAD_DONE to MCP */ bnx2x_send_unload_done(bp, false); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 1e7a6f1d4223..0d8e61c63c7c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9474,15 +9474,18 @@ unload_error: } } - /* Disable HW interrupts, NAPI */ - bnx2x_netif_stop(bp, 1); - /* Delete all NAPI objects */ - bnx2x_del_all_napi(bp); - if (CNIC_LOADED(bp)) - bnx2x_del_all_napi_cnic(bp); + if (!bp->nic_stopped) { + /* Disable HW interrupts, NAPI */ + bnx2x_netif_stop(bp, 1); + /* Delete all NAPI objects */ + bnx2x_del_all_napi(bp); + if (CNIC_LOADED(bp)) + bnx2x_del_all_napi_cnic(bp); - /* Release IRQs */ - bnx2x_free_irq(bp); + /* Release IRQs */ + bnx2x_free_irq(bp); + bp->nic_stopped = true; + } /* Reset the chip, unless PCI function is offline. If we reach this * point following a PCI error handling, it means device is really @@ -14238,13 +14241,16 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev) } bnx2x_drain_tx_queues(bp); bnx2x_send_unload_req(bp, UNLOAD_RECOVERY); - bnx2x_netif_stop(bp, 1); - bnx2x_del_all_napi(bp); + if (!bp->nic_stopped) { + bnx2x_netif_stop(bp, 1); + bnx2x_del_all_napi(bp); - if (CNIC_LOADED(bp)) - bnx2x_del_all_napi_cnic(bp); + if (CNIC_LOADED(bp)) + bnx2x_del_all_napi_cnic(bp); - bnx2x_free_irq(bp); + bnx2x_free_irq(bp); + bp->nic_stopped = true; + } /* Report UNLOAD_DONE to MCP */ bnx2x_send_unload_done(bp, true); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index 0657a0f5170f..8946a931e87e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -529,13 +529,16 @@ void bnx2x_vfpf_close_vf(struct bnx2x *bp) bnx2x_vfpf_finalize(bp, &req->first_tlv); free_irq: - /* Disable HW interrupts, NAPI */ - bnx2x_netif_stop(bp, 0); - /* Delete all NAPI objects */ - bnx2x_del_all_napi(bp); - - /* Release IRQs */ - bnx2x_free_irq(bp); + if (!bp->nic_stopped) { + /* Disable HW interrupts, NAPI */ + bnx2x_netif_stop(bp, 0); + /* Delete all NAPI objects */ + bnx2x_del_all_napi(bp); + + /* Release IRQs */ + bnx2x_free_irq(bp); + bp->nic_stopped = true; + } } static void bnx2x_leading_vfq_init(struct bnx2x *bp, struct bnx2x_virtf *vf, |