summaryrefslogtreecommitdiff
path: root/drivers/scsi/megaraid/megaraid_sas_base.c
diff options
context:
space:
mode:
authorShivasharan S <shivasharan.srikanteshwara@broadcom.com>2018-10-16 23:37:39 -0700
committerMartin K. Petersen <martin.petersen@oracle.com>2018-11-06 20:33:56 -0500
commit3f6194af539464d83b29ed347aceddb336a3625c (patch)
treea9e91aeb4437552b78ffa8715332b5420ab70cbb /drivers/scsi/megaraid/megaraid_sas_base.c
parent8dbb748d4d1b6731f12dbdb855ffe320cfe2cb2b (diff)
downloadlwn-3f6194af539464d83b29ed347aceddb336a3625c.tar.gz
lwn-3f6194af539464d83b29ed347aceddb336a3625c.zip
scsi: megaraid_sas: Add watchdog thread to detect Firmware fault
Currently driver checks for Firmware state change from ISR context, and only when there are interrupts tied with no I/O completions. We have seen multiple cases where doorbell interrupts sent by firmware to indicate FW state change are not processed by driver and it takes long time for driver to trigger OCR. And if there are no IOs running, since we only check the FW state as part of ISR code, fault goes undetected by driver and OCR will not be triggered. This patch introduces a separate workqueue that runs every one second to detect Firmware FAULT state and trigger reset immediately. As an additional gain, removing PCI reads from ISR to check FW state results in improved performance as well. Signed-off-by: Sumit Saxena <sumit.saxena@broadcom.com> Signed-off-by: Shivasharan S <shivasharan.srikanteshwara@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'drivers/scsi/megaraid/megaraid_sas_base.c')
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c34
1 files changed, 30 insertions, 4 deletions
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 9b90c716f06d..4dc29e055461 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -5582,8 +5582,20 @@ static int megasas_init_fw(struct megasas_instance *instance)
instance->skip_heartbeat_timer_del = 1;
}
+ /*
+ * Create and start watchdog thread which will monitor
+ * controller state every 1 sec and trigger OCR when
+ * it enters fault state
+ */
+ if (instance->adapter_type != MFI_SERIES)
+ if (megasas_fusion_start_watchdog(instance) != SUCCESS)
+ goto fail_start_watchdog;
+
return 0;
+fail_start_watchdog:
+ if (instance->requestorId && !instance->skip_heartbeat_timer_del)
+ del_timer_sync(&instance->sriov_heartbeat_timer);
fail_get_ld_pd_list:
instance->instancet->disable_intr(instance);
fail_init_adapter:
@@ -6434,12 +6446,10 @@ static inline void megasas_init_ctrl_params(struct megasas_instance *instance)
instance->disableOnlineCtrlReset = 1;
instance->UnevenSpanSupport = 0;
- if (instance->adapter_type != MFI_SERIES) {
+ if (instance->adapter_type != MFI_SERIES)
INIT_WORK(&instance->work_init, megasas_fusion_ocr_wq);
- INIT_WORK(&instance->crash_init, megasas_fusion_crash_dump_wq);
- } else {
+ else
INIT_WORK(&instance->work_init, process_fw_state_change_wq);
- }
}
/**
@@ -6708,6 +6718,10 @@ megasas_suspend(struct pci_dev *pdev, pm_message_t state)
if (instance->requestorId && !instance->skip_heartbeat_timer_del)
del_timer_sync(&instance->sriov_heartbeat_timer);
+ /* Stop the FW fault detection watchdog */
+ if (instance->adapter_type != MFI_SERIES)
+ megasas_fusion_stop_watchdog(instance);
+
megasas_flush_cache(instance);
megasas_shutdown_controller(instance, MR_DCMD_HIBERNATE_SHUTDOWN);
@@ -6843,8 +6857,16 @@ megasas_resume(struct pci_dev *pdev)
if (megasas_start_aen(instance))
dev_err(&instance->pdev->dev, "Start AEN failed\n");
+ /* Re-launch FW fault watchdog */
+ if (instance->adapter_type != MFI_SERIES)
+ if (megasas_fusion_start_watchdog(instance) != SUCCESS)
+ goto fail_start_watchdog;
+
return 0;
+fail_start_watchdog:
+ if (instance->requestorId && !instance->skip_heartbeat_timer_del)
+ del_timer_sync(&instance->sriov_heartbeat_timer);
fail_init_mfi:
megasas_free_ctrl_dma_buffers(instance);
megasas_free_ctrl_mem(instance);
@@ -6912,6 +6934,10 @@ static void megasas_detach_one(struct pci_dev *pdev)
if (instance->requestorId && !instance->skip_heartbeat_timer_del)
del_timer_sync(&instance->sriov_heartbeat_timer);
+ /* Stop the FW fault detection watchdog */
+ if (instance->adapter_type != MFI_SERIES)
+ megasas_fusion_stop_watchdog(instance);
+
if (instance->fw_crash_state != UNAVAILABLE)
megasas_free_host_crash_buffer(instance);
scsi_remove_host(instance->host);