summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManish Rangankar <mrangankar@marvell.com>2020-09-08 02:56:56 -0700
committerMartin K. Petersen <martin.petersen@oracle.com>2020-09-08 22:40:25 -0400
commitf4ba4e55db6db7e85e847f1a5891efa792580809 (patch)
tree40faf3bb8256457f00909e5fcf514aababb1b248
parent4118879be3755b38171063dfd4a57611d4b20a83 (diff)
downloadlwn-f4ba4e55db6db7e85e847f1a5891efa792580809.tar.gz
lwn-f4ba4e55db6db7e85e847f1a5891efa792580809.zip
scsi: qedi: Add firmware error recovery invocation support
Add support to initiate MFW process recovery for all the devices if storage function receives the event first. Also added fix for kernel test robot warning, >> drivers/scsi/qedi/qedi_main.c:1119:6: warning: no previous prototype >> for 'qedi_schedule_hw_err_handler' [-Wmissing-prototypes] Link: https://lore.kernel.org/r/20200908095657.26821-8-mrangankar@marvell.com Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Manish Rangankar <mrangankar@marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
-rw-r--r--drivers/scsi/qedi/qedi.h4
-rw-r--r--drivers/scsi/qedi/qedi_fw.c7
-rw-r--r--drivers/scsi/qedi/qedi_iscsi.c3
-rw-r--r--drivers/scsi/qedi/qedi_main.c63
4 files changed, 73 insertions, 4 deletions
diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h
index 9c19ec9dc682..7e59d50f2fab 100644
--- a/drivers/scsi/qedi/qedi.h
+++ b/drivers/scsi/qedi/qedi.h
@@ -274,6 +274,10 @@ struct qedi_ctx {
spinlock_t ll2_lock; /* Light L2 lock */
spinlock_t hba_lock; /* per port lock */
struct task_struct *ll2_recv_thread;
+ unsigned long qedi_err_flags;
+#define QEDI_ERR_ATTN_CLR_EN 0
+#define QEDI_ERR_IS_RECOVERABLE 2
+#define QEDI_ERR_OVERRIDE_EN 31
unsigned long flags;
#define UIO_DEV_OPENED 1
#define QEDI_IOTHREAD_WAKE 2
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index f158fde0a43c..440ddd2309f1 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -1267,7 +1267,8 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn,
rval = wait_event_interruptible_timeout(qedi_conn->wait_queue,
((qedi_conn->cmd_cleanup_req ==
qedi_conn->cmd_cleanup_cmpl) ||
- qedi_conn->ep),
+ test_bit(QEDI_IN_RECOVERY,
+ &qedi->flags)),
5 * HZ);
if (rval) {
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
@@ -1292,7 +1293,9 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn,
/* Enable IOs for all other sessions except current.*/
if (!wait_event_interruptible_timeout(qedi_conn->wait_queue,
(qedi_conn->cmd_cleanup_req ==
- qedi_conn->cmd_cleanup_cmpl),
+ qedi_conn->cmd_cleanup_cmpl) ||
+ test_bit(QEDI_IN_RECOVERY,
+ &qedi->flags),
5 * HZ)) {
iscsi_host_for_each_session(qedi->shost,
qedi_mark_device_available);
diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c
index ae86a40ca040..08c05403cd72 100644
--- a/drivers/scsi/qedi/qedi_iscsi.c
+++ b/drivers/scsi/qedi/qedi_iscsi.c
@@ -1072,7 +1072,8 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
qedi_ep->state = EP_STATE_DISCONN_START;
- if (test_bit(QEDI_IN_SHUTDOWN, &qedi->flags))
+ if (test_bit(QEDI_IN_SHUTDOWN, &qedi->flags) ||
+ test_bit(QEDI_IN_RECOVERY, &qedi->flags))
goto ep_release_conn;
ret = qedi_ops->destroy_conn(qedi->cdev, qedi_ep->handle, abrt_conn);
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 7186e814866e..8fde35f843f7 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -50,6 +50,10 @@ module_param(qedi_ll2_buf_size, uint, 0644);
MODULE_PARM_DESC(qedi_ll2_buf_size,
"parameter to set ping packet size, default - 0x400, Jumbo packets - 0x2400.");
+static uint qedi_flags_override;
+module_param(qedi_flags_override, uint, 0644);
+MODULE_PARM_DESC(qedi_flags_override, "Disable/Enable MFW error flags bits action.");
+
const struct qed_iscsi_ops *qedi_ops;
static struct scsi_transport_template *qedi_scsi_transport;
static struct pci_driver qedi_pci_driver;
@@ -63,6 +67,8 @@ static void qedi_reset_uio_rings(struct qedi_uio_dev *udev);
static void qedi_ll2_free_skbs(struct qedi_ctx *qedi);
static struct nvm_iscsi_block *qedi_get_nvram_block(struct qedi_ctx *qedi);
static void qedi_recovery_handler(struct work_struct *work);
+static void qedi_schedule_hw_err_handler(void *dev,
+ enum qed_hw_err_type err_type);
static int qedi_iscsi_event_cb(void *context, u8 fw_event_code, void *fw_handle)
{
@@ -1112,6 +1118,39 @@ exit_get_data:
return;
}
+void qedi_schedule_hw_err_handler(void *dev,
+ enum qed_hw_err_type err_type)
+{
+ struct qedi_ctx *qedi = (struct qedi_ctx *)dev;
+ unsigned long override_flags = qedi_flags_override;
+
+ if (override_flags && test_bit(QEDI_ERR_OVERRIDE_EN, &override_flags))
+ qedi->qedi_err_flags = qedi_flags_override;
+
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+ "HW error handler scheduled, err=%d err_flags=0x%x\n",
+ err_type, qedi->qedi_err_flags);
+
+ switch (err_type) {
+ case QED_HW_ERR_MFW_RESP_FAIL:
+ case QED_HW_ERR_HW_ATTN:
+ case QED_HW_ERR_DMAE_FAIL:
+ case QED_HW_ERR_RAMROD_FAIL:
+ case QED_HW_ERR_FW_ASSERT:
+ /* Prevent HW attentions from being reasserted */
+ if (test_bit(QEDI_ERR_ATTN_CLR_EN, &qedi->qedi_err_flags))
+ qedi_ops->common->attn_clr_enable(qedi->cdev, true);
+
+ if (err_type == QED_HW_ERR_RAMROD_FAIL &&
+ test_bit(QEDI_ERR_IS_RECOVERABLE, &qedi->qedi_err_flags))
+ qedi_ops->common->recovery_process(qedi->cdev);
+
+ break;
+ default:
+ break;
+ }
+}
+
static void qedi_schedule_recovery_handler(void *dev)
{
struct qedi_ctx *qedi = dev;
@@ -1154,6 +1193,7 @@ static struct qed_iscsi_cb_ops qedi_cb_ops = {
{
.link_update = qedi_link_update,
.schedule_recovery_handler = qedi_schedule_recovery_handler,
+ .schedule_hw_err_handler = qedi_schedule_hw_err_handler,
.get_protocol_tlv_data = qedi_get_protocol_tlv_data,
.get_generic_tlv_data = qedi_get_generic_tlv_data,
}
@@ -2354,6 +2394,7 @@ static void __qedi_remove(struct pci_dev *pdev, int mode)
{
struct qedi_ctx *qedi = pci_get_drvdata(pdev);
int rval;
+ u16 retry = 10;
if (mode == QEDI_MODE_SHUTDOWN)
iscsi_host_for_each_session(qedi->shost,
@@ -2382,7 +2423,13 @@ static void __qedi_remove(struct pci_dev *pdev, int mode)
qedi_sync_free_irqs(qedi);
if (!test_bit(QEDI_IN_OFFLINE, &qedi->flags)) {
- qedi_ops->stop(qedi->cdev);
+ while (retry--) {
+ rval = qedi_ops->stop(qedi->cdev);
+ if (rval < 0)
+ msleep(1000);
+ else
+ break;
+ }
qedi_ops->ll2->stop(qedi->cdev);
}
@@ -2441,6 +2488,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
struct qed_probe_params qed_params;
void *task_start, *task_end;
int rc;
+ u16 retry = 10;
if (mode != QEDI_MODE_RECOVERY) {
qedi = qedi_host_alloc(pdev);
@@ -2452,6 +2500,10 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
qedi = pci_get_drvdata(pdev);
}
+retry_probe:
+ if (mode == QEDI_MODE_RECOVERY)
+ msleep(2000);
+
memset(&qed_params, 0, sizeof(qed_params));
qed_params.protocol = QED_PROTOCOL_ISCSI;
qed_params.dp_module = qedi_qed_debug;
@@ -2459,11 +2511,20 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
qed_params.is_vf = is_vf;
qedi->cdev = qedi_ops->common->probe(pdev, &qed_params);
if (!qedi->cdev) {
+ if (mode == QEDI_MODE_RECOVERY && retry) {
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+ "Retry %d initialize hardware\n", retry);
+ retry--;
+ goto retry_probe;
+ }
+
rc = -ENODEV;
QEDI_ERR(&qedi->dbg_ctx, "Cannot initialize hardware\n");
goto free_host;
}
+ set_bit(QEDI_ERR_ATTN_CLR_EN, &qedi->qedi_err_flags);
+ set_bit(QEDI_ERR_IS_RECOVERABLE, &qedi->qedi_err_flags);
atomic_set(&qedi->link_state, QEDI_LINK_DOWN);
rc = qedi_ops->fill_dev_info(qedi->cdev, &qedi->dev_info);