From c2fe742ff6e77c5b4fe4ad273191ddf28fdea25e Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Mon, 4 Mar 2019 07:26:35 -0500 Subject: scsi: mpt3sas: Fix kernel panic during expander reset During expander reset handling, the driver invokes kernel function scsi_host_find_tag() to obtain outstanding requests associated with the scsi host managed by the driver. Driver loops from tag value zero to hba queue depth to obtain the outstanding scmds. But when blk-mq is enabled, the block layer may return stale entry for one or more requests. This may lead to kernel panic if the returned value is inaccessible or the memory pointed by the returned value is reused. Reference of upstream discussion: https://patchwork.kernel.org/patch/10734933/ Instead of calling scsi_host_find_tag() API for each and every smid (smid is tag +1) from one to shost->can_queue, now driver will call this API (to obtain the outstanding scmd) only for those smid's which are outstanding at the driver level. Driver will determine whether this smid is outstanding at driver level by looking into it's corresponding MPI request frame, if its MPI request frame is empty, then it means that this smid is free and does not need to call scsi_host_find_tag() for it. By doing this, driver will invoke scsi_host_find_tag() for only those tags which are outstanding at the driver level. Driver will check whether particular MPI request frame is empty or not by looking into the "DevHandle" field. If this field is zero then it means that this MPI request is empty. For active MPI request DevHandle must be non-zero. Also driver will memset the MPI request frame once the corresponding scmd is processed (i.e. just before calling scmd->done function). Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 6 ++++++ drivers/scsi/mpt3sas/mpt3sas_scsih.c | 12 ++++++++++++ 2 files changed, 18 insertions(+) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index e57774472e75..1d8c584ec1e9 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -3281,12 +3281,18 @@ mpt3sas_base_free_smid(struct MPT3SAS_ADAPTER *ioc, u16 smid) if (smid < ioc->hi_priority_smid) { struct scsiio_tracker *st; + void *request; st = _get_st_from_smid(ioc, smid); if (!st) { _base_recovery_check(ioc); return; } + + /* Clear MPI request frame */ + request = mpt3sas_base_get_msg_frame(ioc, smid); + memset(request, 0, ioc->request_sz); + mpt3sas_base_clear_st(ioc, st); _base_recovery_check(ioc); return; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 8bb5b8f9f4d2..1ccfbc7eebe0 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -1462,11 +1462,23 @@ mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc, u16 smid) { struct scsi_cmnd *scmd = NULL; struct scsiio_tracker *st; + Mpi25SCSIIORequest_t *mpi_request; if (smid > 0 && smid <= ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT) { u32 unique_tag = smid - 1; + mpi_request = mpt3sas_base_get_msg_frame(ioc, smid); + + /* + * If SCSI IO request is outstanding at driver level then + * DevHandle filed must be non-zero. If DevHandle is zero + * then it means that this smid is free at driver level, + * so return NULL. + */ + if (!mpi_request->DevHandle) + return scmd; + scmd = scsi_host_find_tag(ioc->shost, unique_tag); if (scmd) { st = scsi_cmd_priv(scmd); -- cgit v1.2.3 From b6554cfe09e1f610aed7d57164ab7760be57acd9 Mon Sep 17 00:00:00 2001 From: Dave Carroll Date: Fri, 22 Mar 2019 12:16:03 -0600 Subject: scsi: aacraid: Insure we don't access PCIe space during AER/EEH There are a few windows during AER/EEH when we can access PCIe I/O mapped registers. This will harden the access to insure we do not allow PCIe access during errors Signed-off-by: Dave Carroll Reviewed-by: Sagar Biradar Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/aacraid.h | 7 ++++++- drivers/scsi/aacraid/commsup.c | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 1df5171594b8..11fb68d7e60d 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -2640,9 +2640,14 @@ static inline unsigned int cap_to_cyls(sector_t capacity, unsigned divisor) return capacity; } +static inline int aac_pci_offline(struct aac_dev *dev) +{ + return pci_channel_offline(dev->pdev) || dev->handle_pci_error; +} + static inline int aac_adapter_check_health(struct aac_dev *dev) { - if (unlikely(pci_channel_offline(dev->pdev))) + if (unlikely(aac_pci_offline(dev))) return -1; return (dev)->a_ops.adapter_check_health(dev); diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index e67e032936ef..78430a7b294c 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -672,7 +672,7 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, return -ETIMEDOUT; } - if (unlikely(pci_channel_offline(dev->pdev))) + if (unlikely(aac_pci_offline(dev))) return -EFAULT; if ((blink = aac_adapter_check_health(dev)) > 0) { @@ -772,7 +772,7 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback, spin_unlock_irqrestore(&fibptr->event_lock, flags); - if (unlikely(pci_channel_offline(dev->pdev))) + if (unlikely(aac_pci_offline(dev))) return -EFAULT; fibptr->flags |= FIB_CONTEXT_FLAG_WAIT; -- cgit v1.2.3 From fba1bdd2a9a93f3e2181ec1936a3c2f6b37e7ed6 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 01:30:59 -0500 Subject: scsi: qla4xxx: fix a potential NULL pointer dereference In case iscsi_lookup_endpoint fails, the fix returns -EINVAL to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Acked-by: Manish Rangankar Reviewed-by: Mukesh Ojha Signed-off-by: Martin K. Petersen --- drivers/scsi/qla4xxx/ql4_os.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 16a18d5d856f..6e4f4931ae17 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -3203,6 +3203,8 @@ static int qla4xxx_conn_bind(struct iscsi_cls_session *cls_session, if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) return -EINVAL; ep = iscsi_lookup_endpoint(transport_fd); + if (!ep) + return -EINVAL; conn = cls_conn->dd_data; qla_conn = conn->dd_data; qla_conn->qla_ep = ep->dd_data; -- cgit v1.2.3 From 70fc085c5015c54a7b8742a45fc9ab05d6da90da Mon Sep 17 00:00:00 2001 From: zhengbin Date: Fri, 22 Mar 2019 10:56:46 +0800 Subject: scsi: core: Run queue when state is set to running after being blocked Use dd to test a SCSI device: 1. echo "blocked" >/sys/block/sda/device/state 2. dd if=/dev/sda of=/mnt/t.log bs=1M count=10 3. echo "running" >/sys/block/sda/device/state dd should finish this work after step 3, but it hangs. After step2, the call chain is this: blk_mq_dispatch_rq_list-->scsi_queue_rq-->prep_to_mq prep_to_mq will return BLK_STS_RESOURCE, and scsi_queue_rq will transition it to BLK_STS_DEV_RESOURCE which means that driver can guarantee that IO dispatch will be triggered in future when the resource is available. Need to follow the rule if we set the device state to running. [mkp: tweaked commit description and code comment as suggested by Bart] Signed-off-by: zhengbin Reviewed-by: Ming Lei Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_sysfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 6a9040faed00..3b119ca0cc0c 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -771,6 +771,12 @@ store_state_field(struct device *dev, struct device_attribute *attr, mutex_lock(&sdev->state_mutex); ret = scsi_device_set_state(sdev, state); + /* + * If the device state changes to SDEV_RUNNING, we need to run + * the queue to avoid I/O hang. + */ + if (ret == 0 && state == SDEV_RUNNING) + blk_mq_run_hw_queues(sdev->request_queue, true); mutex_unlock(&sdev->state_mutex); return ret == 0 ? count : -EINVAL; -- cgit v1.2.3 From c14a57264399efd39514a2329c591a4b954246d8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 25 Mar 2019 10:01:46 -0700 Subject: scsi: sd: Fix a race between closing an sd device and sd I/O The scsi_end_request() function calls scsi_cmd_to_driver() indirectly and hence needs the disk->private_data pointer. Avoid that that pointer is cleared before all affected I/O requests have finished. This patch avoids that the following crash occurs: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Call trace: scsi_mq_uninit_cmd+0x1c/0x30 scsi_end_request+0x7c/0x1b8 scsi_io_completion+0x464/0x668 scsi_finish_command+0xbc/0x160 scsi_eh_flush_done_q+0x10c/0x170 sas_scsi_recover_host+0x84c/0xa98 [libsas] scsi_error_handler+0x140/0x5b0 kthread+0x100/0x12c ret_from_fork+0x10/0x18 Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: Jason Yan Cc: Signed-off-by: Bart Van Assche Reported-by: Jason Yan Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 251db30d0882..3ffa34dc2fd5 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1415,11 +1415,6 @@ static void sd_release(struct gendisk *disk, fmode_t mode) scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW); } - /* - * XXX and what if there are packets in flight and this close() - * XXX is followed by a "rmmod sd_mod"? - */ - scsi_disk_put(sdkp); } @@ -3505,9 +3500,21 @@ static void scsi_disk_release(struct device *dev) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct gendisk *disk = sdkp->disk; - + struct request_queue *q = disk->queue; + ida_free(&sd_index_ida, sdkp->index); + /* + * Wait until all requests that are in progress have completed. + * This is necessary to avoid that e.g. scsi_end_request() crashes + * due to clearing the disk->private_data pointer. Wait from inside + * scsi_disk_release() instead of from sd_release() to avoid that + * freezing and unfreezing the request queue affects user space I/O + * in case multiple processes open a /dev/sd... node concurrently. + */ + blk_mq_freeze_queue(q); + blk_mq_unfreeze_queue(q); + disk->private_data = NULL; put_disk(disk); put_device(&sdkp->device->sdev_gendev); -- cgit v1.2.3 From 1d5de5bd311be7cd54f02f7cd164f0349a75c876 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 27 Mar 2019 12:11:52 -0400 Subject: scsi: sd: Quiesce warning if device does not report optimal I/O size Commit a83da8a4509d ("scsi: sd: Optimal I/O size should be a multiple of physical block size") split one conditional into several separate statements in an effort to provide more accurate warning messages when a device reports a nonsensical value. However, this reorganization accidentally dropped the precondition of the reported value being larger than zero. This lead to a warning getting emitted on devices that do not report an optimal I/O size at all. Remain silent if a device does not report an optimal I/O size. Fixes: a83da8a4509d ("scsi: sd: Optimal I/O size should be a multiple of physical block size") Cc: Randy Dunlap Cc: Reported-by: Hussam Al-Tayeb Tested-by: Hussam Al-Tayeb Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3ffa34dc2fd5..2b2bc4b49d78 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3071,6 +3071,9 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, unsigned int opt_xfer_bytes = logical_to_bytes(sdp, sdkp->opt_xfer_blocks); + if (sdkp->opt_xfer_blocks == 0) + return false; + if (sdkp->opt_xfer_blocks > dev_max) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u logical blocks " \ -- cgit v1.2.3 From fe67888fc007a76b81e37da23ce5bd8fb95890b0 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 26 Mar 2019 14:36:58 +0100 Subject: scsi: zfcp: fix rport unblock if deleted SCSI devices on Scsi_Host An already deleted SCSI device can exist on the Scsi_Host and remain there because something still holds a reference. A new SCSI device with the same H:C:T:L and FCP device, target port WWPN, and FCP LUN can be created. When we try to unblock an rport, we still find the deleted SCSI device and return early because the zfcp_scsi_dev of that SCSI device is not ZFCP_STATUS_COMMON_UNBLOCKED. Hence we miss to unblock the rport, even if the new proper SCSI device would be in good state. Therefore, skip deleted SCSI devices when iterating the sdevs of the shost. [cf. __scsi_device_lookup{_by_target}() or scsi_device_get()] The following abbreviated trace sequence can indicate such problem: Area : REC Tag : ersfs_3 LUN : 0x4045400300000000 WWPN : 0x50050763031bd327 LUN status : 0x40000000 not ZFCP_STATUS_COMMON_UNBLOCKED Ready count : n not incremented yet Running count : 0x00000000 ERP want : 0x01 ERP need : 0xc1 ZFCP_ERP_ACTION_NONE Area : REC Tag : ersfs_3 LUN : 0x4045400300000000 WWPN : 0x50050763031bd327 LUN status : 0x41000000 Ready count : n+1 Running count : 0x00000000 ERP want : 0x01 ERP need : 0x01 ... Area : REC Level : 4 only with increased trace level Tag : ertru_l LUN : 0x4045400300000000 WWPN : 0x50050763031bd327 LUN status : 0x40000000 Request ID : 0x0000000000000000 ERP status : 0x01800000 ERP step : 0x1000 ERP action : 0x01 ERP count : 0x00 NOT followed by a trace record with tag "scpaddy" for WWPN 0x50050763031bd327. Signed-off-by: Steffen Maier Fixes: 6f2ce1c6af37 ("scsi: zfcp: fix rport unblock race with LUN recovery") Cc: #2.6.32+ Reviewed-by: Jens Remus Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_erp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 744a64680d5b..c0b2348d7ce6 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -1341,6 +1341,9 @@ static void zfcp_erp_try_rport_unblock(struct zfcp_port *port) struct zfcp_scsi_dev *zsdev = sdev_to_zfcp(sdev); int lun_status; + if (sdev->sdev_state == SDEV_DEL || + sdev->sdev_state == SDEV_CANCEL) + continue; if (zsdev->port != port) continue; /* LUN under port of interest */ -- cgit v1.2.3 From 242ec1455151267fe35a0834aa9038e4c4670884 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 26 Mar 2019 14:36:59 +0100 Subject: scsi: zfcp: fix scsi_eh host reset with port_forced ERP for non-NPIV FCP devices Suppose more than one non-NPIV FCP device is active on the same channel. Send I/O to storage and have some of the pending I/O run into a SCSI command timeout, e.g. due to bit errors on the fibre. Now the error situation stops. However, we saw FCP requests continue to timeout in the channel. The abort will be successful, but the subsequent TUR fails. Scsi_eh starts. The LUN reset fails. The target reset fails. The host reset only did an FCP device recovery. However, for non-NPIV FCP devices, this does not close and reopen ports on the SAN-side if other non-NPIV FCP device(s) share the same open ports. In order to resolve the continuing FCP request timeouts, we need to explicitly close and reopen ports on the SAN-side. This was missing since the beginning of zfcp in v2.6.0 history commit ea127f975424 ("[PATCH] s390 (7/7): zfcp host adapter."). Note: The FSF requests for forced port reopen could run into FSF request timeouts due to other reasons. This would trigger an internal FCP device recovery. Pending forced port reopen recoveries would get dismissed. So some ports might not get fully reopened during this host reset handler. However, subsequent I/O would trigger the above described escalation and eventually all ports would be forced reopen to resolve any continuing FCP request timeouts due to earlier bit errors. Signed-off-by: Steffen Maier Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: #3.0+ Reviewed-by: Jens Remus Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_erp.c | 14 ++++++++++++++ drivers/s390/scsi/zfcp_ext.h | 2 ++ drivers/s390/scsi/zfcp_scsi.c | 4 ++++ 3 files changed, 20 insertions(+) diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index c0b2348d7ce6..e8fc28dba8df 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -624,6 +624,20 @@ static void zfcp_erp_strategy_memwait(struct zfcp_erp_action *erp_action) add_timer(&erp_action->timer); } +void zfcp_erp_port_forced_reopen_all(struct zfcp_adapter *adapter, + int clear, char *dbftag) +{ + unsigned long flags; + struct zfcp_port *port; + + write_lock_irqsave(&adapter->erp_lock, flags); + read_lock(&adapter->port_list_lock); + list_for_each_entry(port, &adapter->port_list, list) + _zfcp_erp_port_forced_reopen(port, clear, dbftag); + read_unlock(&adapter->port_list_lock); + write_unlock_irqrestore(&adapter->erp_lock, flags); +} + static void _zfcp_erp_port_reopen_all(struct zfcp_adapter *adapter, int clear, char *dbftag) { diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 3fce47b0b21b..c6acca521ffe 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -70,6 +70,8 @@ extern void zfcp_erp_port_reopen(struct zfcp_port *port, int clear, char *dbftag); extern void zfcp_erp_port_shutdown(struct zfcp_port *, int, char *); extern void zfcp_erp_port_forced_reopen(struct zfcp_port *, int, char *); +extern void zfcp_erp_port_forced_reopen_all(struct zfcp_adapter *adapter, + int clear, char *dbftag); extern void zfcp_erp_set_lun_status(struct scsi_device *, u32); extern void zfcp_erp_clear_lun_status(struct scsi_device *, u32); extern void zfcp_erp_lun_reopen(struct scsi_device *, int, char *); diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index f4f6a07c5222..221d0dfb8493 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -368,6 +368,10 @@ static int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt) struct zfcp_adapter *adapter = zfcp_sdev->port->adapter; int ret = SUCCESS, fc_ret; + if (!(adapter->connection_features & FSF_FEATURE_NPIV_MODE)) { + zfcp_erp_port_forced_reopen_all(adapter, 0, "schrh_p"); + zfcp_erp_wait(adapter); + } zfcp_erp_adapter_reopen(adapter, 0, "schrh_1"); zfcp_erp_wait(adapter); fc_ret = fc_block_scsi_eh(scpnt); -- cgit v1.2.3 From c8206579175c34a2546de8a74262456278a7795a Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 26 Mar 2019 14:37:00 +0100 Subject: scsi: zfcp: reduce flood of fcrscn1 trace records on multi-element RSCN If an incoming ELS of type RSCN contains more than one element, zfcp suboptimally causes repeated erp trigger NOP trace records for each previously failed port. These could be ports that went away. It loops over each RSCN element, and for each of those in an inner loop over all zfcp_ports. The trigger to recover failed ports should be just the reception of some RSCN, no matter how many elements it has. So we can loop over failed ports separately, and only then loop over each RSCN element to handle the non-failed ports. The call chain was: zfcp_fc_incoming_rscn for (i = 1; i < no_entries; i++) _zfcp_fc_incoming_rscn list_for_each_entry(port, &adapter->port_list, list) if (masked port->d_id match) zfcp_fc_test_link if (!port->d_id) zfcp_erp_port_reopen "fcrscn1" <=== In order the reduce the "flooding" of the REC trace area in such cases, we factor out handling the failed ports to be outside of the entries loop: zfcp_fc_incoming_rscn if (no_entries > 1) <=== list_for_each_entry(port, &adapter->port_list, list) <=== if (!port->d_id) zfcp_erp_port_reopen "fcrscn1" <=== for (i = 1; i < no_entries; i++) _zfcp_fc_incoming_rscn list_for_each_entry(port, &adapter->port_list, list) if (masked port->d_id match) zfcp_fc_test_link Abbreviated example trace records before this code change: Tag : fcrscn1 WWPN : 0x500507630310d327 ERP want : 0x02 ERP need : 0x02 Tag : fcrscn1 WWPN : 0x500507630310d327 ERP want : 0x02 ERP need : 0x00 NOP => superfluous trace record The last trace entry repeats if there are more than 2 RSCN elements. Signed-off-by: Steffen Maier Reviewed-by: Benjamin Block Reviewed-by: Jens Remus Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_fc.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index db00b5e3abbe..33eddb02ee30 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -239,10 +239,6 @@ static void _zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req, u32 range, list_for_each_entry(port, &adapter->port_list, list) { if ((port->d_id & range) == (ntoh24(page->rscn_fid) & range)) zfcp_fc_test_link(port); - if (!port->d_id) - zfcp_erp_port_reopen(port, - ZFCP_STATUS_COMMON_ERP_FAILED, - "fcrscn1"); } read_unlock_irqrestore(&adapter->port_list_lock, flags); } @@ -250,6 +246,7 @@ static void _zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req, u32 range, static void zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req) { struct fsf_status_read_buffer *status_buffer = (void *)fsf_req->data; + struct zfcp_adapter *adapter = fsf_req->adapter; struct fc_els_rscn *head; struct fc_els_rscn_page *page; u16 i; @@ -263,6 +260,22 @@ static void zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req) no_entries = be16_to_cpu(head->rscn_plen) / sizeof(struct fc_els_rscn_page); + if (no_entries > 1) { + /* handle failed ports */ + unsigned long flags; + struct zfcp_port *port; + + read_lock_irqsave(&adapter->port_list_lock, flags); + list_for_each_entry(port, &adapter->port_list, list) { + if (port->d_id) + continue; + zfcp_erp_port_reopen(port, + ZFCP_STATUS_COMMON_ERP_FAILED, + "fcrscn1"); + } + read_unlock_irqrestore(&adapter->port_list_lock, flags); + } + for (i = 1; i < no_entries; i++) { /* skip head and start with 1st element */ page++; -- cgit v1.2.3 From 6dc6a944d58aea3d9de3828318b0fffdb60a7097 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 20 Mar 2019 14:56:51 -0500 Subject: scsi: ibmvfc: Remove "failed" from logged errors The text of messages logged with ibmvfc_log_error() always contain the term "failed". In the case of cancelled commands during EH they are reported back by the VIOS using error codes. This can be confusing to somebody looking at these log messages as to whether a command was successfully cancelled. The following real log message for example it is unclear if the transaction was actaully cancelled. <6>sd 0:0:1:1: Cancelling outstanding commands. <3>sd 0:0:1:1: [sde] Command (28) failed: transaction cancelled (2:6) flags: 0 fcp_rsp: 0, resid=0, scsi_status: 0 Remove prefixing of "failed" to all error logged messages. The ibmvfc_log_error() function translates the returned error/status codes to a human readable message already. Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index dbaa4f131433..c3ce27039552 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -1494,7 +1494,7 @@ static void ibmvfc_log_error(struct ibmvfc_event *evt) if (rsp->flags & FCP_RSP_LEN_VALID) rsp_code = rsp->data.info.rsp_code; - scmd_printk(KERN_ERR, cmnd, "Command (%02X) failed: %s (%x:%x) " + scmd_printk(KERN_ERR, cmnd, "Command (%02X) : %s (%x:%x) " "flags: %x fcp_rsp: %x, resid=%d, scsi_status: %x\n", cmnd->cmnd[0], err, vfc_cmd->status, vfc_cmd->error, rsp->flags, rsp_code, scsi_get_resid(cmnd), rsp->scsi_status); -- cgit v1.2.3 From 95237c25d8d08ebc451dd2d793f7e765f57b0c9f Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 20 Mar 2019 14:56:52 -0500 Subject: scsi: ibmvfc: Add failed PRLI to cmd_status lookup array The VIOS uses the SCSI_ERROR class to report PRLI failures. These errors are indicated with the combination of a IBMVFC_FC_SCSI_ERROR return status and 0x8000 error code. Add these codes to cmd_status[] with appropriate human readable error message. Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index c3ce27039552..18ee2a8ec3d5 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -139,6 +139,7 @@ static const struct { { IBMVFC_FC_FAILURE, IBMVFC_VENDOR_SPECIFIC, DID_ERROR, 1, 1, "vendor specific" }, { IBMVFC_FC_SCSI_ERROR, 0, DID_OK, 1, 0, "SCSI error" }, + { IBMVFC_FC_SCSI_ERROR, IBMVFC_COMMAND_FAILED, DID_ERROR, 0, 1, "PRLI to device failed." }, }; static void ibmvfc_npiv_login(struct ibmvfc_host *); -- cgit v1.2.3 From 3e6f7de43f4960fba8322b16531b0d6624a9322d Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 20 Mar 2019 14:56:53 -0500 Subject: scsi: ibmvfc: Byte swap status and error codes when logging Status and error codes are returned in big endian from the VIOS. The values are translated into a human readable format when logged, but the values are also logged. This patch byte swaps those values so that they are consistent between BE and LE platforms. Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 18ee2a8ec3d5..33dda4d32f65 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -1497,7 +1497,7 @@ static void ibmvfc_log_error(struct ibmvfc_event *evt) scmd_printk(KERN_ERR, cmnd, "Command (%02X) : %s (%x:%x) " "flags: %x fcp_rsp: %x, resid=%d, scsi_status: %x\n", - cmnd->cmnd[0], err, vfc_cmd->status, vfc_cmd->error, + cmnd->cmnd[0], err, be16_to_cpu(vfc_cmd->status), be16_to_cpu(vfc_cmd->error), rsp->flags, rsp_code, scsi_get_resid(cmnd), rsp->scsi_status); } @@ -2023,7 +2023,7 @@ static int ibmvfc_reset_device(struct scsi_device *sdev, int type, char *desc) sdev_printk(KERN_ERR, sdev, "%s reset failed: %s (%x:%x) " "flags: %x fcp_rsp: %x, scsi_status: %x\n", desc, ibmvfc_get_cmd_error(be16_to_cpu(rsp_iu.cmd.status), be16_to_cpu(rsp_iu.cmd.error)), - rsp_iu.cmd.status, rsp_iu.cmd.error, fc_rsp->flags, rsp_code, + be16_to_cpu(rsp_iu.cmd.status), be16_to_cpu(rsp_iu.cmd.error), fc_rsp->flags, rsp_code, fc_rsp->scsi_status); rsp_rc = -EIO; } else @@ -2382,7 +2382,7 @@ static int ibmvfc_abort_task_set(struct scsi_device *sdev) sdev_printk(KERN_ERR, sdev, "Abort failed: %s (%x:%x) " "flags: %x fcp_rsp: %x, scsi_status: %x\n", ibmvfc_get_cmd_error(be16_to_cpu(rsp_iu.cmd.status), be16_to_cpu(rsp_iu.cmd.error)), - rsp_iu.cmd.status, rsp_iu.cmd.error, fc_rsp->flags, rsp_code, + be16_to_cpu(rsp_iu.cmd.status), be16_to_cpu(rsp_iu.cmd.error), fc_rsp->flags, rsp_code, fc_rsp->scsi_status); rsp_rc = -EIO; } else @@ -3349,7 +3349,7 @@ static void ibmvfc_tgt_prli_done(struct ibmvfc_event *evt) tgt_log(tgt, level, "Process Login failed: %s (%x:%x) rc=0x%02X\n", ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)), - rsp->status, rsp->error, status); + be16_to_cpu(rsp->status), be16_to_cpu(rsp->error), status); break; } @@ -3447,9 +3447,10 @@ static void ibmvfc_tgt_plogi_done(struct ibmvfc_event *evt) ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT); tgt_log(tgt, level, "Port Login failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n", - ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)), rsp->status, rsp->error, - ibmvfc_get_fc_type(be16_to_cpu(rsp->fc_type)), rsp->fc_type, - ibmvfc_get_ls_explain(be16_to_cpu(rsp->fc_explain)), rsp->fc_explain, status); + ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)), + be16_to_cpu(rsp->status), be16_to_cpu(rsp->error), + ibmvfc_get_fc_type(be16_to_cpu(rsp->fc_type)), be16_to_cpu(rsp->fc_type), + ibmvfc_get_ls_explain(be16_to_cpu(rsp->fc_explain)), be16_to_cpu(rsp->fc_explain), status); break; } @@ -3620,7 +3621,7 @@ static void ibmvfc_tgt_adisc_done(struct ibmvfc_event *evt) fc_explain = (be32_to_cpu(mad->fc_iu.response[1]) & 0x0000ff00) >> 8; tgt_info(tgt, "ADISC failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n", ibmvfc_get_cmd_error(be16_to_cpu(mad->iu.status), be16_to_cpu(mad->iu.error)), - mad->iu.status, mad->iu.error, + be16_to_cpu(mad->iu.status), be16_to_cpu(mad->iu.error), ibmvfc_get_fc_type(fc_reason), fc_reason, ibmvfc_get_ls_explain(fc_explain), fc_explain, status); break; @@ -3832,9 +3833,10 @@ static void ibmvfc_tgt_query_target_done(struct ibmvfc_event *evt) tgt_log(tgt, level, "Query Target failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n", ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)), - rsp->status, rsp->error, ibmvfc_get_fc_type(be16_to_cpu(rsp->fc_type)), - rsp->fc_type, ibmvfc_get_gs_explain(be16_to_cpu(rsp->fc_explain)), - rsp->fc_explain, status); + be16_to_cpu(rsp->status), be16_to_cpu(rsp->error), + ibmvfc_get_fc_type(be16_to_cpu(rsp->fc_type)), be16_to_cpu(rsp->fc_type), + ibmvfc_get_gs_explain(be16_to_cpu(rsp->fc_explain)), be16_to_cpu(rsp->fc_explain), + status); break; } @@ -3960,7 +3962,7 @@ static void ibmvfc_discover_targets_done(struct ibmvfc_event *evt) level += ibmvfc_retry_host_init(vhost); ibmvfc_log(vhost, level, "Discover Targets failed: %s (%x:%x)\n", ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)), - rsp->status, rsp->error); + be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)); break; case IBMVFC_MAD_DRIVER_FAILED: break; @@ -4025,7 +4027,7 @@ static void ibmvfc_npiv_login_done(struct ibmvfc_event *evt) ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD); ibmvfc_log(vhost, level, "NPIV Login failed: %s (%x:%x)\n", ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)), - rsp->status, rsp->error); + be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)); ibmvfc_free_event(evt); return; case IBMVFC_MAD_CRQ_ERROR: -- cgit v1.2.3 From d6e2635b9cf7982102750c5d9e4ba1474afa0981 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 20 Mar 2019 14:56:54 -0500 Subject: scsi: ibmvfc: Clean up transport events No change to functionality. Simply make transport event messages a little clearer, and rework CRQ format enums such that we have separate enums for INIT messages and XPORT events. [mkp: typo] Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 8 +++++--- drivers/scsi/ibmvscsi/ibmvfc.h | 7 ++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 33dda4d32f65..3ad997ac3510 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -2756,16 +2756,18 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost) ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_NONE); if (crq->format == IBMVFC_PARTITION_MIGRATED) { /* We need to re-setup the interpartition connection */ - dev_info(vhost->dev, "Re-enabling adapter\n"); + dev_info(vhost->dev, "Partition migrated, Re-enabling adapter\n"); vhost->client_migrated = 1; ibmvfc_purge_requests(vhost, DID_REQUEUE); ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN); ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_REENABLE); - } else { - dev_err(vhost->dev, "Virtual adapter failed (rc=%d)\n", crq->format); + } else if (crq->format == IBMVFC_PARTNER_FAILED || crq->format == IBMVFC_PARTNER_DEREGISTER) { + dev_err(vhost->dev, "Host partner adapter deregistered or failed (rc=%d)\n", crq->format); ibmvfc_purge_requests(vhost, DID_ERROR); ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN); ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_RESET); + } else { + dev_err(vhost->dev, "Received unknown transport event from partner (rc=%d)\n", crq->format); } return; case IBMVFC_CRQ_CMD_RSP: diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h index b81a53c4a9a8..459cc288ba1d 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.h +++ b/drivers/scsi/ibmvscsi/ibmvfc.h @@ -78,9 +78,14 @@ enum ibmvfc_crq_valid { IBMVFC_CRQ_XPORT_EVENT = 0xFF, }; -enum ibmvfc_crq_format { +enum ibmvfc_crq_init_msg { IBMVFC_CRQ_INIT = 0x01, IBMVFC_CRQ_INIT_COMPLETE = 0x02, +}; + +enum ibmvfc_crq_xport_evts { + IBMVFC_PARTNER_FAILED = 0x01, + IBMVFC_PARTNER_DEREGISTER = 0x02, IBMVFC_PARTITION_MIGRATED = 0x06, }; -- cgit v1.2.3