diff options
author | Shivasharan S <shivasharan.srikanteshwara@broadcom.com> | 2018-10-16 23:37:39 -0700 |
---|---|---|
committer | Martin K. Petersen <martin.petersen@oracle.com> | 2018-11-06 20:33:56 -0500 |
commit | 3f6194af539464d83b29ed347aceddb336a3625c (patch) | |
tree | a9e91aeb4437552b78ffa8715332b5420ab70cbb /drivers/scsi/megaraid/megaraid_sas_base.c | |
parent | 8dbb748d4d1b6731f12dbdb855ffe320cfe2cb2b (diff) | |
download | lwn-3f6194af539464d83b29ed347aceddb336a3625c.tar.gz lwn-3f6194af539464d83b29ed347aceddb336a3625c.zip |
scsi: megaraid_sas: Add watchdog thread to detect Firmware fault
Currently driver checks for Firmware state change from ISR context, and
only when there are interrupts tied with no I/O completions. We have seen
multiple cases where doorbell interrupts sent by firmware to indicate FW
state change are not processed by driver and it takes long time for driver
to trigger OCR. And if there are no IOs running, since we only check the FW
state as part of ISR code, fault goes undetected by driver and OCR will not
be triggered.
This patch introduces a separate workqueue that runs every one second to
detect Firmware FAULT state and trigger reset immediately. As an
additional gain, removing PCI reads from ISR to check FW state results in
improved performance as well.
Signed-off-by: Sumit Saxena <sumit.saxena@broadcom.com>
Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'drivers/scsi/megaraid/megaraid_sas_base.c')
-rw-r--r-- | drivers/scsi/megaraid/megaraid_sas_base.c | 34 |
1 files changed, 30 insertions, 4 deletions
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 9b90c716f06d..4dc29e055461 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5582,8 +5582,20 @@ static int megasas_init_fw(struct megasas_instance *instance) instance->skip_heartbeat_timer_del = 1; } + /* + * Create and start watchdog thread which will monitor + * controller state every 1 sec and trigger OCR when + * it enters fault state + */ + if (instance->adapter_type != MFI_SERIES) + if (megasas_fusion_start_watchdog(instance) != SUCCESS) + goto fail_start_watchdog; + return 0; +fail_start_watchdog: + if (instance->requestorId && !instance->skip_heartbeat_timer_del) + del_timer_sync(&instance->sriov_heartbeat_timer); fail_get_ld_pd_list: instance->instancet->disable_intr(instance); fail_init_adapter: @@ -6434,12 +6446,10 @@ static inline void megasas_init_ctrl_params(struct megasas_instance *instance) instance->disableOnlineCtrlReset = 1; instance->UnevenSpanSupport = 0; - if (instance->adapter_type != MFI_SERIES) { + if (instance->adapter_type != MFI_SERIES) INIT_WORK(&instance->work_init, megasas_fusion_ocr_wq); - INIT_WORK(&instance->crash_init, megasas_fusion_crash_dump_wq); - } else { + else INIT_WORK(&instance->work_init, process_fw_state_change_wq); - } } /** @@ -6708,6 +6718,10 @@ megasas_suspend(struct pci_dev *pdev, pm_message_t state) if (instance->requestorId && !instance->skip_heartbeat_timer_del) del_timer_sync(&instance->sriov_heartbeat_timer); + /* Stop the FW fault detection watchdog */ + if (instance->adapter_type != MFI_SERIES) + megasas_fusion_stop_watchdog(instance); + megasas_flush_cache(instance); megasas_shutdown_controller(instance, MR_DCMD_HIBERNATE_SHUTDOWN); @@ -6843,8 +6857,16 @@ megasas_resume(struct pci_dev *pdev) if (megasas_start_aen(instance)) dev_err(&instance->pdev->dev, "Start AEN failed\n"); + /* Re-launch FW fault watchdog */ + if (instance->adapter_type != MFI_SERIES) + if (megasas_fusion_start_watchdog(instance) != SUCCESS) + goto fail_start_watchdog; + return 0; +fail_start_watchdog: + if (instance->requestorId && !instance->skip_heartbeat_timer_del) + del_timer_sync(&instance->sriov_heartbeat_timer); fail_init_mfi: megasas_free_ctrl_dma_buffers(instance); megasas_free_ctrl_mem(instance); @@ -6912,6 +6934,10 @@ static void megasas_detach_one(struct pci_dev *pdev) if (instance->requestorId && !instance->skip_heartbeat_timer_del) del_timer_sync(&instance->sriov_heartbeat_timer); + /* Stop the FW fault detection watchdog */ + if (instance->adapter_type != MFI_SERIES) + megasas_fusion_stop_watchdog(instance); + if (instance->fw_crash_state != UNAVAILABLE) megasas_free_host_crash_buffer(instance); scsi_remove_host(instance->host); |