summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBart Van Assche <bvanassche@acm.org>2013-02-21 17:20:00 +0000
committerRoland Dreier <roland@purestorage.com>2013-02-25 09:31:14 -0800
commit2ce19e72f4d570c87e025ee6fca4eae699a8b712 (patch)
treef9055b2a0390954b222450ea93dffdab0316c674
parentc7c4e7ff8047e43c45628b85ac200582e9404c39 (diff)
downloadlwn-2ce19e72f4d570c87e025ee6fca4eae699a8b712.tar.gz
lwn-2ce19e72f4d570c87e025ee6fca4eae699a8b712.zip
IB/srp: Fail I/O requests if the transport is offline
If an SRP target is no longer reachable and srp_reset_host() fails to reconnect then ib_srp will invoke scsi_remove_host(). That function will invoke __scsi_remove_device() for each LUN. And that last function will change the device state from SDEV_TRANSPORT_OFFLINE into SDEV_CANCEL. Certain user space software, e.g. older versions of multipathd, continue queueing I/O to SCSI devices that are in the SDEV_CANCEL state. If these I/O requests are submitted as SG_IO that means that the REQ_PREEMPT flag will be set and hence that these requests will be passed to srp_queuecommand(). These requests will time out. If new requests are queued fast enough from user space these active requests will prevent __scsi_remove_device() to finish. Avoid this by failing I/O requests in the SDEV_CANCEL state if the transport is offline. Introduce a new variable to keep track of the transport state instead of failing requests if (!target->connected || target->qp_in_error), so that the SCSI error handler has a chance to retry commands after a transport layer failure occurred. Signed-off-by: Bart Van Assche <bvanassche@acm.org> Cc: <stable@vger.kernel.org> # 3.8 Signed-off-by: Roland Dreier <roland@purestorage.com>
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c7
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h1
2 files changed, 8 insertions, 0 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 8a7eb9f98a0c..7ccf3284dda3 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -734,6 +734,7 @@ static int srp_reconnect_target(struct srp_target_port *target)
scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING :
SDEV_TRANSPORT_OFFLINE);
+ target->transport_offline = !!ret;
if (ret)
goto err;
@@ -1353,6 +1354,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
unsigned long flags;
int len;
+ if (unlikely(target->transport_offline)) {
+ scmnd->result = DID_NO_CONNECT << 16;
+ scmnd->scsi_done(scmnd);
+ return 0;
+ }
+
spin_lock_irqsave(&target->lock, flags);
iu = __srp_get_tx_iu(target, SRP_IU_CMD);
if (!iu)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index de2d0b3c0bfe..66fbedda4571 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -140,6 +140,7 @@ struct srp_target_port {
unsigned int cmd_sg_cnt;
unsigned int indirect_size;
bool allow_ext_sg;
+ bool transport_offline;
/* Everything above this point is used in the hot path of
* command processing. Try to keep them packed into cachelines.