From c1d454246c1339388ed0788f34f88ee12ad58ef3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 14 Aug 2013 15:31:52 +0000 Subject: libfc: Source code comment spelling fixes Change 'initiaive' into 'initiative', 'remainig' into 'remaining' and change 'exected' into 'expected'. Signed-off-by: Bart Van Assche Signed-off-by: Robert Love --- include/scsi/fc/fc_fc2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/scsi') diff --git a/include/scsi/fc/fc_fc2.h b/include/scsi/fc/fc_fc2.h index f87777d0d5bd..0b2671431305 100644 --- a/include/scsi/fc/fc_fc2.h +++ b/include/scsi/fc/fc_fc2.h @@ -104,7 +104,7 @@ struct fc_esb { * esb_e_stat - flags from FC-FS-2 T11/1619-D Rev 0.90. */ #define ESB_ST_RESP (1 << 31) /* responder to exchange */ -#define ESB_ST_SEQ_INIT (1 << 30) /* port holds sequence initiaive */ +#define ESB_ST_SEQ_INIT (1 << 30) /* port holds sequence initiative */ #define ESB_ST_COMPLETE (1 << 29) /* exchange is complete */ #define ESB_ST_ABNORMAL (1 << 28) /* abnormal ending condition */ #define ESB_ST_REC_QUAL (1 << 26) /* recovery qualifier active */ -- cgit v1.2.3 From 7030fd626129ec4d616784516a462d317c251d39 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 17 Aug 2013 20:34:43 +0000 Subject: libfc: Do not invoke the response handler after fc_exch_done() While the FCoE initiator driver invokes fc_exch_done() from inside the libfc response handler, FCoE target drivers typically invoke fc_exch_done() from outside the libfc response handler. The object fc_exch.arg points at may disappear as soon as fc_exch_done() has finished. So it's important not to invoke the response handler function after fc_exch_done() has finished. Modify libfc such that this guarantee is provided if fc_exch_done() is invoked from outside a response handler. This patch fixes a sporadic crash in FCoE target implementations after a command has been aborted. Signed-off-by: Bart Van Assche Cc: Neil Horman Signed-off-by: Robert Love --- drivers/scsi/libfc/fc_exch.c | 131 ++++++++++++++++++++++++++++++------------- include/scsi/libfc.h | 9 +++ 2 files changed, 101 insertions(+), 39 deletions(-) (limited to 'include/scsi') diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 47ebc7b1e143..1b3a09473452 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -381,6 +381,8 @@ static void fc_exch_timer_set(struct fc_exch *ep, unsigned int timer_msec) /** * fc_exch_done_locked() - Complete an exchange with the exchange lock held * @ep: The exchange that is complete + * + * Note: May sleep if invoked from outside a response handler. */ static int fc_exch_done_locked(struct fc_exch *ep) { @@ -392,7 +394,6 @@ static int fc_exch_done_locked(struct fc_exch *ep) * ep, and in that case we only clear the resp and set it as * complete, so it can be reused by the timer to send the rrq. */ - ep->resp = NULL; if (ep->state & FC_EX_DONE) return rc; ep->esb_stat |= ESB_ST_COMPLETE; @@ -589,6 +590,8 @@ static struct fc_seq *fc_seq_start_next(struct fc_seq *sp) /* * Set the response handler for the exchange associated with a sequence. + * + * Note: May sleep if invoked from outside a response handler. */ static void fc_seq_set_resp(struct fc_seq *sp, void (*resp)(struct fc_seq *, struct fc_frame *, @@ -596,8 +599,18 @@ static void fc_seq_set_resp(struct fc_seq *sp, void *arg) { struct fc_exch *ep = fc_seq_exch(sp); + DEFINE_WAIT(wait); spin_lock_bh(&ep->ex_lock); + while (ep->resp_active && ep->resp_task != current) { + prepare_to_wait(&ep->resp_wq, &wait, TASK_UNINTERRUPTIBLE); + spin_unlock_bh(&ep->ex_lock); + + schedule(); + + spin_lock_bh(&ep->ex_lock); + } + finish_wait(&ep->resp_wq, &wait); ep->resp = resp; ep->arg = arg; spin_unlock_bh(&ep->ex_lock); @@ -680,6 +693,61 @@ static int fc_seq_exch_abort(const struct fc_seq *req_sp, return error; } +/** + * fc_invoke_resp() - invoke ep->resp() + * + * Notes: + * It is assumed that after initialization finished (this means the + * first unlock of ex_lock after fc_exch_alloc()) ep->resp and ep->arg are + * modified only via fc_seq_set_resp(). This guarantees that none of these + * two variables changes if ep->resp_active > 0. + * + * If an fc_seq_set_resp() call is busy modifying ep->resp and ep->arg when + * this function is invoked, the first spin_lock_bh() call in this function + * will wait until fc_seq_set_resp() has finished modifying these variables. + * + * Since fc_exch_done() invokes fc_seq_set_resp() it is guaranteed that that + * ep->resp() won't be invoked after fc_exch_done() has returned. + * + * The response handler itself may invoke fc_exch_done(), which will clear the + * ep->resp pointer. + * + * Return value: + * Returns true if and only if ep->resp has been invoked. + */ +static bool fc_invoke_resp(struct fc_exch *ep, struct fc_seq *sp, + struct fc_frame *fp) +{ + void (*resp)(struct fc_seq *, struct fc_frame *fp, void *arg); + void *arg; + bool res = false; + + spin_lock_bh(&ep->ex_lock); + ep->resp_active++; + if (ep->resp_task != current) + ep->resp_task = !ep->resp_task ? current : NULL; + resp = ep->resp; + arg = ep->arg; + spin_unlock_bh(&ep->ex_lock); + + if (resp) { + resp(sp, fp, arg); + res = true; + } else if (!IS_ERR(fp)) { + fc_frame_free(fp); + } + + spin_lock_bh(&ep->ex_lock); + if (--ep->resp_active == 0) + ep->resp_task = NULL; + spin_unlock_bh(&ep->ex_lock); + + if (ep->resp_active == 0) + wake_up(&ep->resp_wq); + + return res; +} + /** * fc_exch_timeout() - Handle exchange timer expiration * @work: The work_struct identifying the exchange that timed out @@ -689,8 +757,6 @@ static void fc_exch_timeout(struct work_struct *work) struct fc_exch *ep = container_of(work, struct fc_exch, timeout_work.work); struct fc_seq *sp = &ep->seq; - void (*resp)(struct fc_seq *, struct fc_frame *fp, void *arg); - void *arg; u32 e_stat; int rc = 1; @@ -708,16 +774,13 @@ static void fc_exch_timeout(struct work_struct *work) fc_exch_rrq(ep); goto done; } else { - resp = ep->resp; - arg = ep->arg; - ep->resp = NULL; if (e_stat & ESB_ST_ABNORMAL) rc = fc_exch_done_locked(ep); spin_unlock_bh(&ep->ex_lock); if (!rc) fc_exch_delete(ep); - if (resp) - resp(sp, ERR_PTR(-FC_EX_TIMEOUT), arg); + fc_invoke_resp(ep, sp, ERR_PTR(-FC_EX_TIMEOUT)); + fc_seq_set_resp(sp, NULL, ep->arg); fc_seq_exch_abort(sp, 2 * ep->r_a_tov); goto done; } @@ -804,6 +867,8 @@ hit: ep->f_ctl = FC_FC_FIRST_SEQ; /* next seq is first seq */ ep->rxid = FC_XID_UNKNOWN; ep->class = mp->class; + ep->resp_active = 0; + init_waitqueue_head(&ep->resp_wq); INIT_DELAYED_WORK(&ep->timeout_work, fc_exch_timeout); out: return ep; @@ -864,6 +929,8 @@ static struct fc_exch *fc_exch_find(struct fc_exch_mgr *mp, u16 xid) * fc_exch_done() - Indicate that an exchange/sequence tuple is complete and * the memory allocated for the related objects may be freed. * @sp: The sequence that has completed + * + * Note: May sleep if invoked from outside a response handler. */ static void fc_exch_done(struct fc_seq *sp) { @@ -873,6 +940,8 @@ static void fc_exch_done(struct fc_seq *sp) spin_lock_bh(&ep->ex_lock); rc = fc_exch_done_locked(ep); spin_unlock_bh(&ep->ex_lock); + + fc_seq_set_resp(sp, NULL, ep->arg); if (!rc) fc_exch_delete(ep); } @@ -1436,9 +1505,7 @@ static void fc_exch_recv_req(struct fc_lport *lport, struct fc_exch_mgr *mp, * If new exch resp handler is valid then call that * first. */ - if (ep->resp) - ep->resp(sp, fp, ep->arg); - else + if (!fc_invoke_resp(ep, sp, fp)) lport->tt.lport_recv(lport, fp); fc_exch_release(ep); /* release from lookup */ } else { @@ -1462,8 +1529,6 @@ static void fc_exch_recv_seq_resp(struct fc_exch_mgr *mp, struct fc_frame *fp) struct fc_exch *ep; enum fc_sof sof; u32 f_ctl; - void (*resp)(struct fc_seq *, struct fc_frame *fp, void *arg); - void *ex_resp_arg; int rc; ep = fc_exch_find(mp, ntohs(fh->fh_ox_id)); @@ -1506,14 +1571,11 @@ static void fc_exch_recv_seq_resp(struct fc_exch_mgr *mp, struct fc_frame *fp) if (fc_sof_needs_ack(sof)) fc_seq_send_ack(sp, fp); - resp = ep->resp; - ex_resp_arg = ep->arg; if (fh->fh_type != FC_TYPE_FCP && fr_eof(fp) == FC_EOF_T && (f_ctl & (FC_FC_LAST_SEQ | FC_FC_END_SEQ)) == (FC_FC_LAST_SEQ | FC_FC_END_SEQ)) { spin_lock_bh(&ep->ex_lock); - resp = ep->resp; rc = fc_exch_done_locked(ep); WARN_ON(fc_seq_exch(sp) != ep); spin_unlock_bh(&ep->ex_lock); @@ -1534,10 +1596,8 @@ static void fc_exch_recv_seq_resp(struct fc_exch_mgr *mp, struct fc_frame *fp) * If new exch resp handler is valid then call that * first. */ - if (resp) - resp(sp, fp, ex_resp_arg); - else - fc_frame_free(fp); + fc_invoke_resp(ep, sp, fp); + fc_exch_release(ep); return; rel: @@ -1576,8 +1636,6 @@ static void fc_exch_recv_resp(struct fc_exch_mgr *mp, struct fc_frame *fp) */ static void fc_exch_abts_resp(struct fc_exch *ep, struct fc_frame *fp) { - void (*resp)(struct fc_seq *, struct fc_frame *fp, void *arg); - void *ex_resp_arg; struct fc_frame_header *fh; struct fc_ba_acc *ap; struct fc_seq *sp; @@ -1622,9 +1680,6 @@ static void fc_exch_abts_resp(struct fc_exch *ep, struct fc_frame *fp) break; } - resp = ep->resp; - ex_resp_arg = ep->arg; - /* do we need to do some other checks here. Can we reuse more of * fc_exch_recv_seq_resp */ @@ -1636,17 +1691,14 @@ static void fc_exch_abts_resp(struct fc_exch *ep, struct fc_frame *fp) ntoh24(fh->fh_f_ctl) & FC_FC_LAST_SEQ) rc = fc_exch_done_locked(ep); spin_unlock_bh(&ep->ex_lock); + + fc_exch_hold(ep); if (!rc) fc_exch_delete(ep); - - if (resp) - resp(sp, fp, ex_resp_arg); - else - fc_frame_free(fp); - + fc_invoke_resp(ep, sp, fp); if (has_rec) fc_exch_timer_set(ep, ep->r_a_tov); - + fc_exch_release(ep); } /** @@ -1768,32 +1820,33 @@ static void fc_seq_ls_rjt(struct fc_frame *rx_fp, enum fc_els_rjt_reason reason, /** * fc_exch_reset() - Reset an exchange * @ep: The exchange to be reset + * + * Note: May sleep if invoked from outside a response handler. */ static void fc_exch_reset(struct fc_exch *ep) { struct fc_seq *sp; - void (*resp)(struct fc_seq *, struct fc_frame *, void *); - void *arg; int rc = 1; spin_lock_bh(&ep->ex_lock); fc_exch_abort_locked(ep, 0); ep->state |= FC_EX_RST_CLEANUP; fc_exch_timer_cancel(ep); - resp = ep->resp; - ep->resp = NULL; if (ep->esb_stat & ESB_ST_REC_QUAL) atomic_dec(&ep->ex_refcnt); /* drop hold for rec_qual */ ep->esb_stat &= ~ESB_ST_REC_QUAL; - arg = ep->arg; sp = &ep->seq; rc = fc_exch_done_locked(ep); spin_unlock_bh(&ep->ex_lock); + + fc_exch_hold(ep); + if (!rc) fc_exch_delete(ep); - if (resp) - resp(sp, ERR_PTR(-FC_EX_CLOSED), arg); + fc_invoke_resp(ep, sp, ERR_PTR(-FC_EX_CLOSED)); + fc_seq_set_resp(sp, NULL, ep->arg); + fc_exch_release(ep); } /** diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index e1379b4e8faf..52beadf9a29b 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -410,6 +410,12 @@ struct fc_seq { * @fh_type: The frame type * @class: The class of service * @seq: The sequence in use on this exchange + * @resp_active: Number of tasks that are concurrently executing @resp(). + * @resp_task: If @resp_active > 0, either the task executing @resp(), the + * task that has been interrupted to execute the soft-IRQ + * executing @resp() or NULL if more than one task is executing + * @resp concurrently. + * @resp_wq: Waitqueue for the tasks waiting on @resp_active. * @resp: Callback for responses on this exchange * @destructor: Called when destroying the exchange * @arg: Passed as a void pointer to the resp() callback @@ -441,6 +447,9 @@ struct fc_exch { u32 r_a_tov; u32 f_ctl; struct fc_seq seq; + int resp_active; + struct task_struct *resp_task; + wait_queue_head_t resp_wq; void (*resp)(struct fc_seq *, struct fc_frame *, void *); void *arg; void (*destructor)(struct fc_seq *, void *); -- cgit v1.2.3 From 9d34876f820d55c94bd0b2a2ed3d2e2976cbd997 Mon Sep 17 00:00:00 2001 From: Robert Love Date: Thu, 5 Sep 2013 07:47:27 +0000 Subject: libfcoe: Make fcoe_sysfs optional / fix fnic NULL exception fnic doesn't use any of the create/destroy/enable/disable interfaces either from the (legacy) module paramaters or the (new) fcoe_sysfs interfaces. When fcoe_sysfs was introduced fnic wasn't changed since it wasn't using the interfaces. libfcoe incorrectly assumed that that all of its users were using fcoe_sysfs and when adding and deleting FCFs would assume the existance of a fcoe_ctlr_device. fnic was not allocating this structure because it doesn't care about the standard user interfaces (fnic starts on link only). If/When libfcoe tried to use the fcoe_ctlr_device's lock for the first time a NULL pointer exception would be triggered. Since fnic doesn't care about sysfs or user interfaces, the solution is to drop libfcoe's assumption that all drivers are using fcoe_sysfs. This patch accomplishes this by changing some of the structure relationships. We need a way to determine when a LLD is using fcoe_sysfs or not and we can do that by checking for the existance of the fcoe_ctlr_device. Prior to this patch, it was assumed that the fcoe_ctlr structure was allocated with the fcoe_ctlr_device and immediately followed it in memory. To reach the fcoe_ctlr_device we would simply go back in memory from the fcoe_ctlr to get the fcoe_ctlr_device. Since fnic doesn't allocate the fcoe_ctlr_device, we cannot keep that assumption. This patch adds a pointer from the fcoe_ctlr to the fcoe_ctlr_device. For bnx2fc and fcoe we will continue to allocate the two structures together, but then we'll set the ctlr->cdev pointer to point at the fcoe_ctlr_device. fnic will not change and will continue to allocate the fcoe_ctlr itself, and ctlr->cdev will remain NULL. When libfcoe adds fcoe_fcf's to the fcoe_ctlr it will check if ctlr->cdev is set and only if so will it continue to interact with fcoe_sysfs. Signed-off-by: Robert Love Acked-by: Neil Horman Tested-by: Hiral Patel --- drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 1 + drivers/scsi/fcoe/fcoe.c | 1 + drivers/scsi/fcoe/fcoe_ctlr.c | 94 ++++++++++++++++++++++++++------------- include/scsi/libfcoe.h | 7 ++- 4 files changed, 71 insertions(+), 32 deletions(-) (limited to 'include/scsi') diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 69ac55495c1d..27f2cc4b97a5 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -1381,6 +1381,7 @@ struct bnx2fc_interface *bnx2fc_interface_create(struct bnx2fc_hba *hba, return NULL; } ctlr = fcoe_ctlr_device_priv(ctlr_dev); + ctlr->cdev = ctlr_dev; interface = fcoe_ctlr_priv(ctlr); dev_hold(netdev); kref_init(&interface->kref); diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index dff40b2fccbd..8626988e12a5 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -408,6 +408,7 @@ static struct fcoe_interface *fcoe_interface_create(struct net_device *netdev, } ctlr = fcoe_ctlr_device_priv(ctlr_dev); + ctlr->cdev = ctlr_dev; fcoe = fcoe_ctlr_priv(ctlr); dev_hold(netdev); diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 692c6535fe75..75efdbc54ef8 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -160,10 +160,16 @@ void fcoe_ctlr_init(struct fcoe_ctlr *fip, enum fip_state mode) } EXPORT_SYMBOL(fcoe_ctlr_init); +/** + * fcoe_sysfs_fcf_add() - Add a fcoe_fcf{,_device} to a fcoe_ctlr{,_device} + * @new: The newly discovered FCF + * + * Called with fip->ctlr_mutex held + */ static int fcoe_sysfs_fcf_add(struct fcoe_fcf *new) { struct fcoe_ctlr *fip = new->fip; - struct fcoe_ctlr_device *ctlr_dev = fcoe_ctlr_to_ctlr_dev(fip); + struct fcoe_ctlr_device *ctlr_dev; struct fcoe_fcf_device *temp, *fcf_dev; int rc = -ENOMEM; @@ -174,8 +180,6 @@ static int fcoe_sysfs_fcf_add(struct fcoe_fcf *new) if (!temp) goto out; - mutex_lock(&ctlr_dev->lock); - temp->fabric_name = new->fabric_name; temp->switch_name = new->switch_name; temp->fc_map = new->fc_map; @@ -185,55 +189,83 @@ static int fcoe_sysfs_fcf_add(struct fcoe_fcf *new) temp->fka_period = new->fka_period; temp->selected = 0; /* default to unselected */ - fcf_dev = fcoe_fcf_device_add(ctlr_dev, temp); - if (unlikely(!fcf_dev)) - goto unlock; - /* - * The fcoe_sysfs layer can return a CONNECTED fcf that - * has a priv (fcf was never deleted) or a CONNECTED fcf - * that doesn't have a priv (fcf was deleted). However, - * libfcoe will always delete FCFs before trying to add - * them. This is ensured because both recv_adv and - * age_fcfs are protected by the the fcoe_ctlr's mutex. - * This means that we should never get a FCF with a - * non-NULL priv pointer. + * If ctlr_dev doesn't exist then it means we're a libfcoe user + * who doesn't use fcoe_syfs and didn't allocate a fcoe_ctlr_device. + * fnic would be an example of a driver with this behavior. In this + * case we want to add the fcoe_fcf to the fcoe_ctlr list, but we + * don't want to make sysfs changes. */ - BUG_ON(fcf_dev->priv); - fcf_dev->priv = new; - new->fcf_dev = fcf_dev; + ctlr_dev = fcoe_ctlr_to_ctlr_dev(fip); + if (ctlr_dev) { + mutex_lock(&ctlr_dev->lock); + fcf_dev = fcoe_fcf_device_add(ctlr_dev, temp); + if (unlikely(!fcf_dev)) { + rc = -ENOMEM; + goto out; + } + + /* + * The fcoe_sysfs layer can return a CONNECTED fcf that + * has a priv (fcf was never deleted) or a CONNECTED fcf + * that doesn't have a priv (fcf was deleted). However, + * libfcoe will always delete FCFs before trying to add + * them. This is ensured because both recv_adv and + * age_fcfs are protected by the the fcoe_ctlr's mutex. + * This means that we should never get a FCF with a + * non-NULL priv pointer. + */ + BUG_ON(fcf_dev->priv); + + fcf_dev->priv = new; + new->fcf_dev = fcf_dev; + mutex_unlock(&ctlr_dev->lock); + } list_add(&new->list, &fip->fcfs); fip->fcf_count++; rc = 0; -unlock: - mutex_unlock(&ctlr_dev->lock); - out: kfree(temp); return rc; } +/** + * fcoe_sysfs_fcf_del() - Remove a fcoe_fcf{,_device} to a fcoe_ctlr{,_device} + * @new: The FCF to be removed + * + * Called with fip->ctlr_mutex held + */ static void fcoe_sysfs_fcf_del(struct fcoe_fcf *new) { struct fcoe_ctlr *fip = new->fip; - struct fcoe_ctlr_device *ctlr_dev = fcoe_ctlr_to_ctlr_dev(fip); + struct fcoe_ctlr_device *cdev; struct fcoe_fcf_device *fcf_dev; list_del(&new->list); fip->fcf_count--; - mutex_lock(&ctlr_dev->lock); - - fcf_dev = fcoe_fcf_to_fcf_dev(new); - WARN_ON(!fcf_dev); - new->fcf_dev = NULL; - fcoe_fcf_device_delete(fcf_dev); - kfree(new); - - mutex_unlock(&ctlr_dev->lock); + /* + * If ctlr_dev doesn't exist then it means we're a libfcoe user + * who doesn't use fcoe_syfs and didn't allocate a fcoe_ctlr_device + * or a fcoe_fcf_device. + * + * fnic would be an example of a driver with this behavior. In this + * case we want to remove the fcoe_fcf from the fcoe_ctlr list (above), + * but we don't want to make sysfs changes. + */ + cdev = fcoe_ctlr_to_ctlr_dev(fip); + if (cdev) { + mutex_lock(&cdev->lock); + fcf_dev = fcoe_fcf_to_fcf_dev(new); + WARN_ON(!fcf_dev); + new->fcf_dev = NULL; + fcoe_fcf_device_delete(fcf_dev); + kfree(new); + mutex_unlock(&cdev->lock); + } } /** diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h index 4427393115ea..de7e3ee60f0c 100644 --- a/include/scsi/libfcoe.h +++ b/include/scsi/libfcoe.h @@ -90,6 +90,7 @@ enum fip_state { * @lp: &fc_lport: libfc local port. * @sel_fcf: currently selected FCF, or NULL. * @fcfs: list of discovered FCFs. + * @cdev: (Optional) pointer to sysfs fcoe_ctlr_device. * @fcf_count: number of discovered FCF entries. * @sol_time: time when a multicast solicitation was last sent. * @sel_time: time after which to select an FCF. @@ -127,6 +128,7 @@ struct fcoe_ctlr { struct fc_lport *lp; struct fcoe_fcf *sel_fcf; struct list_head fcfs; + struct fcoe_ctlr_device *cdev; u16 fcf_count; unsigned long sol_time; unsigned long sel_time; @@ -168,8 +170,11 @@ static inline void *fcoe_ctlr_priv(const struct fcoe_ctlr *ctlr) return (void *)(ctlr + 1); } +/* + * This assumes that the fcoe_ctlr (x) is allocated with the fcoe_ctlr_device. + */ #define fcoe_ctlr_to_ctlr_dev(x) \ - (struct fcoe_ctlr_device *)(((struct fcoe_ctlr_device *)(x)) - 1) + (x)->cdev /** * struct fcoe_fcf - Fibre-Channel Forwarder -- cgit v1.2.3 From 3af142fea7da55718b733b25be9d54ddb87acfcc Mon Sep 17 00:00:00 2001 From: Adheer Chandravanshi Date: Tue, 17 Sep 2013 07:54:48 -0400 Subject: [SCSI] scsi_transport_iscsi: Add support to set CHAP entries For offload iSCSI like qla4xxx, CHAP entries are stored in adapter's flash. This patch adds support to add/update CHAP entries in adapter's flash using iscsi tools, like Open-iSCSI. Signed-off-by: Adheer Chandravanshi Signed-off-by: Vikas Chaudhary Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_iscsi.c | 26 ++++++++++++++++++++++++++ include/scsi/iscsi_if.h | 17 +++++++++++++++++ include/scsi/scsi_transport_iscsi.h | 1 + 3 files changed, 44 insertions(+) (limited to 'include/scsi') diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index e4a989fa477d..63a6ca49d4e5 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2744,6 +2744,28 @@ exit_get_chap: return err; } +static int iscsi_set_chap(struct iscsi_transport *transport, + struct iscsi_uevent *ev, uint32_t len) +{ + char *data = (char *)ev + sizeof(*ev); + struct Scsi_Host *shost; + int err = 0; + + if (!transport->set_chap) + return -ENOSYS; + + shost = scsi_host_lookup(ev->u.set_path.host_no); + if (!shost) { + pr_err("%s could not find host no %u\n", + __func__, ev->u.set_path.host_no); + return -ENODEV; + } + + err = transport->set_chap(shost, data, len); + scsi_host_put(shost); + return err; +} + static int iscsi_delete_chap(struct iscsi_transport *transport, struct iscsi_uevent *ev) { @@ -3234,6 +3256,10 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) case ISCSI_UEVENT_LOGOUT_FLASHNODE_SID: err = iscsi_logout_flashnode_sid(transport, ev); break; + case ISCSI_UEVENT_SET_CHAP: + err = iscsi_set_chap(transport, ev, + nlmsg_attrlen(nlh, sizeof(*ev))); + break; default: err = -ENOSYS; break; diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h index 13d81c5c4ebf..5d6ed6cf12cc 100644 --- a/include/scsi/iscsi_if.h +++ b/include/scsi/iscsi_if.h @@ -69,6 +69,7 @@ enum iscsi_uevent_e { ISCSI_UEVENT_LOGIN_FLASHNODE = UEVENT_BASE + 28, ISCSI_UEVENT_LOGOUT_FLASHNODE = UEVENT_BASE + 29, ISCSI_UEVENT_LOGOUT_FLASHNODE_SID = UEVENT_BASE + 30, + ISCSI_UEVENT_SET_CHAP = UEVENT_BASE + 31, /* up events */ ISCSI_KEVENT_RECV_PDU = KEVENT_BASE + 1, @@ -309,8 +310,16 @@ enum iscsi_param_type { ISCSI_HOST_PARAM, /* iscsi_host_param */ ISCSI_NET_PARAM, /* iscsi_net_param */ ISCSI_FLASHNODE_PARAM, /* iscsi_flashnode_param */ + ISCSI_CHAP_PARAM, /* iscsi_chap_param */ }; +/* structure for minimalist usecase */ +struct iscsi_param_info { + uint32_t len; /* Actual length of the param value */ + uint16_t param; /* iscsi param */ + uint8_t value[0]; /* length sized value follows */ +} __packed; + struct iscsi_iface_param_info { uint32_t iface_num; /* iface number, 0 - n */ uint32_t len; /* Actual length of the param */ @@ -739,6 +748,14 @@ enum chap_type_e { CHAP_TYPE_IN, }; +enum iscsi_chap_param { + ISCSI_CHAP_PARAM_INDEX, + ISCSI_CHAP_PARAM_CHAP_TYPE, + ISCSI_CHAP_PARAM_USERNAME, + ISCSI_CHAP_PARAM_PASSWORD, + ISCSI_CHAP_PARAM_PASSWORD_LEN +}; + #define ISCSI_CHAP_AUTH_NAME_MAX_LEN 256 #define ISCSI_CHAP_AUTH_SECRET_MAX_LEN 256 struct iscsi_chap_rec { diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index d0f1602985e7..fe7c8f3e93f8 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -152,6 +152,7 @@ struct iscsi_transport { int (*get_chap) (struct Scsi_Host *shost, uint16_t chap_tbl_idx, uint32_t *num_entries, char *buf); int (*delete_chap) (struct Scsi_Host *shost, uint16_t chap_tbl_idx); + int (*set_chap) (struct Scsi_Host *shost, void *data, int len); int (*get_flashnode_param) (struct iscsi_bus_flash_session *fnode_sess, int param, char *buf); int (*set_flashnode_param) (struct iscsi_bus_flash_session *fnode_sess, -- cgit v1.2.3 From 6b1e5a45d4eaa75e28f2d170ea43ab8fc6dd34d8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 23 Oct 2013 10:51:20 +0200 Subject: [SCSI] remove check for 'resetting' Field is now unused, so this is dead code. [jejb: remove resetting and last_reset from Scsi_Host] Signed-off-by: Hannes Reinecke Signed-off-by: James Bottomley --- drivers/scsi/scsi.c | 28 ---------------------------- include/scsi/scsi_host.h | 2 -- 2 files changed, 30 deletions(-) (limited to 'include/scsi') diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index eaa808e6ba91..fe0bcb18fb26 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -78,11 +78,6 @@ static void scsi_done(struct scsi_cmnd *cmd); * Definitions and constants. */ -#define MIN_RESET_DELAY (2*HZ) - -/* Do not call reset on error if we just did a reset within 15 sec. */ -#define MIN_RESET_PERIOD (15*HZ) - /* * Note - the initial logging level can be set here to log events at boot time. * After the system is up, you may enable logging via the /proc interface. @@ -658,7 +653,6 @@ EXPORT_SYMBOL(scsi_cmd_get_serial); int scsi_dispatch_cmd(struct scsi_cmnd *cmd) { struct Scsi_Host *host = cmd->device->host; - unsigned long timeout; int rtn = 0; atomic_inc(&cmd->device->iorequest_cnt); @@ -704,28 +698,6 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) (cmd->device->lun << 5 & 0xe0); } - /* - * We will wait MIN_RESET_DELAY clock ticks after the last reset so - * we can avoid the drive not being ready. - */ - timeout = host->last_reset + MIN_RESET_DELAY; - - if (host->resetting && time_before(jiffies, timeout)) { - int ticks_remaining = timeout - jiffies; - /* - * NOTE: This may be executed from within an interrupt - * handler! This is bad, but for now, it'll do. The irq - * level of the interrupt handler has been masked out by the - * platform dependent interrupt handling code already, so the - * sti() here will not cause another call to the SCSI host's - * interrupt handler (assuming there is one irq-level per - * host). - */ - while (--ticks_remaining >= 0) - mdelay(1 + 999 / HZ); - host->resetting = 0; - } - scsi_log_send(cmd); /* diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 755243572219..a74b7d9afe8e 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -598,8 +598,6 @@ struct Scsi_Host { unsigned int host_eh_scheduled; /* EH scheduled without command */ unsigned int host_no; /* Used for IOCTL_GET_IDLUN, /proc/scsi et al. */ - int resetting; /* if set, it means that last_reset is a valid value */ - unsigned long last_reset; /* * These three parameters can be used to allow for wide scsi, -- cgit v1.2.3 From b45620229dd67ff1daffa8adce57f37b37860f78 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 23 Oct 2013 10:51:21 +0200 Subject: [SCSI] Add 'eh_deadline' to limit SCSI EH runtime This patchs adds an 'eh_deadline' sysfs attribute to the scsi host which limits the overall runtime of the SCSI EH. The 'eh_deadline' value is stored in the now obsolete field 'resetting'. When a command is failed the start time of the EH is stored in 'last_reset'. If the overall runtime of the SCSI EH is longer than last_reset + eh_deadline, the EH is short-circuited and falls through to issue a host reset only. [jejb: add comments in Scsi_Host about new fields] Signed-off-by: Hannes Reinecke Signed-off-by: James Bottomley --- drivers/scsi/hosts.c | 7 +++ drivers/scsi/scsi_error.c | 130 +++++++++++++++++++++++++++++++++++++++++++--- drivers/scsi/scsi_sysfs.c | 37 +++++++++++++ include/scsi/scsi_host.h | 5 ++ 4 files changed, 173 insertions(+), 6 deletions(-) (limited to 'include/scsi') diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index df0c3c71ea43..f334859024c0 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -316,6 +316,12 @@ static void scsi_host_dev_release(struct device *dev) kfree(shost); } +static unsigned int shost_eh_deadline; + +module_param_named(eh_deadline, shost_eh_deadline, uint, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(eh_deadline, + "SCSI EH timeout in seconds (should be between 1 and 2^32-1)"); + static struct device_type scsi_host_type = { .name = "scsi_host", .release = scsi_host_dev_release, @@ -388,6 +394,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) shost->unchecked_isa_dma = sht->unchecked_isa_dma; shost->use_clustering = sht->use_clustering; shost->ordered_tag = sht->ordered_tag; + shost->eh_deadline = shost_eh_deadline * HZ; if (sht->supported_mode == MODE_UNKNOWN) /* means we didn't set it ... default to INITIATOR */ diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 83e591b60193..edae9e20f886 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -87,6 +87,18 @@ void scsi_schedule_eh(struct Scsi_Host *shost) } EXPORT_SYMBOL_GPL(scsi_schedule_eh); +static int scsi_host_eh_past_deadline(struct Scsi_Host *shost) +{ + if (!shost->last_reset || !shost->eh_deadline) + return 0; + + if (time_before(jiffies, + shost->last_reset + shost->eh_deadline)) + return 0; + + return 1; +} + /** * scsi_eh_scmd_add - add scsi cmd to error handling. * @scmd: scmd to run eh on. @@ -109,6 +121,9 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag) if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)) goto out_unlock; + if (shost->eh_deadline && !shost->last_reset) + shost->last_reset = jiffies; + ret = 1; scmd->eh_eflags |= eh_flag; list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q); @@ -138,6 +153,9 @@ enum blk_eh_timer_return scsi_times_out(struct request *req) trace_scsi_dispatch_cmd_timeout(scmd); scsi_log_completion(scmd, TIMEOUT_ERROR); + if (host->eh_deadline && !host->last_reset) + host->last_reset = jiffies; + if (host->transportt->eh_timed_out) rtn = host->transportt->eh_timed_out(scmd); else if (host->hostt->eh_timed_out) @@ -990,13 +1008,26 @@ int scsi_eh_get_sense(struct list_head *work_q, struct list_head *done_q) { struct scsi_cmnd *scmd, *next; + struct Scsi_Host *shost; int rtn; + unsigned long flags; list_for_each_entry_safe(scmd, next, work_q, eh_entry) { if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) || SCSI_SENSE_VALID(scmd)) continue; + shost = scmd->device->host; + spin_lock_irqsave(shost->host_lock, flags); + if (scsi_host_eh_past_deadline(shost)) { + spin_unlock_irqrestore(shost->host_lock, flags); + SCSI_LOG_ERROR_RECOVERY(3, + shost_printk(KERN_INFO, shost, + "skip %s, past eh deadline\n", + __func__)); + break; + } + spin_unlock_irqrestore(shost->host_lock, flags); SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd, "%s: requesting sense\n", current->comm)); @@ -1082,11 +1113,28 @@ static int scsi_eh_test_devices(struct list_head *cmd_list, struct scsi_cmnd *scmd, *next; struct scsi_device *sdev; int finish_cmds; + unsigned long flags; while (!list_empty(cmd_list)) { scmd = list_entry(cmd_list->next, struct scsi_cmnd, eh_entry); sdev = scmd->device; + if (!try_stu) { + spin_lock_irqsave(sdev->host->host_lock, flags); + if (scsi_host_eh_past_deadline(sdev->host)) { + /* Push items back onto work_q */ + list_splice_init(cmd_list, work_q); + spin_unlock_irqrestore(sdev->host->host_lock, + flags); + SCSI_LOG_ERROR_RECOVERY(3, + shost_printk(KERN_INFO, sdev->host, + "skip %s, past eh deadline", + __func__)); + break; + } + spin_unlock_irqrestore(sdev->host->host_lock, flags); + } + finish_cmds = !scsi_device_online(scmd->device) || (try_stu && !scsi_eh_try_stu(scmd) && !scsi_eh_tur(scmd)) || @@ -1122,14 +1170,28 @@ static int scsi_eh_abort_cmds(struct list_head *work_q, struct scsi_cmnd *scmd, *next; LIST_HEAD(check_list); int rtn; + struct Scsi_Host *shost; + unsigned long flags; list_for_each_entry_safe(scmd, next, work_q, eh_entry) { if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD)) continue; + shost = scmd->device->host; + spin_lock_irqsave(shost->host_lock, flags); + if (scsi_host_eh_past_deadline(shost)) { + spin_unlock_irqrestore(shost->host_lock, flags); + list_splice_init(&check_list, work_q); + SCSI_LOG_ERROR_RECOVERY(3, + shost_printk(KERN_INFO, shost, + "skip %s, past eh deadline\n", + __func__)); + return list_empty(work_q); + } + spin_unlock_irqrestore(shost->host_lock, flags); SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:" "0x%p\n", current->comm, scmd)); - rtn = scsi_try_to_abort_cmd(scmd->device->host->hostt, scmd); + rtn = scsi_try_to_abort_cmd(shost->hostt, scmd); if (rtn == SUCCESS || rtn == FAST_IO_FAIL) { scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD; if (rtn == FAST_IO_FAIL) @@ -1187,8 +1249,19 @@ static int scsi_eh_stu(struct Scsi_Host *shost, { struct scsi_cmnd *scmd, *stu_scmd, *next; struct scsi_device *sdev; + unsigned long flags; shost_for_each_device(sdev, shost) { + spin_lock_irqsave(shost->host_lock, flags); + if (scsi_host_eh_past_deadline(shost)) { + spin_unlock_irqrestore(shost->host_lock, flags); + SCSI_LOG_ERROR_RECOVERY(3, + shost_printk(KERN_INFO, shost, + "skip %s, past eh deadline\n", + __func__)); + break; + } + spin_unlock_irqrestore(shost->host_lock, flags); stu_scmd = NULL; list_for_each_entry(scmd, work_q, eh_entry) if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) && @@ -1241,9 +1314,20 @@ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost, { struct scsi_cmnd *scmd, *bdr_scmd, *next; struct scsi_device *sdev; + unsigned long flags; int rtn; shost_for_each_device(sdev, shost) { + spin_lock_irqsave(shost->host_lock, flags); + if (scsi_host_eh_past_deadline(shost)) { + spin_unlock_irqrestore(shost->host_lock, flags); + SCSI_LOG_ERROR_RECOVERY(3, + shost_printk(KERN_INFO, shost, + "skip %s, past eh deadline\n", + __func__)); + break; + } + spin_unlock_irqrestore(shost->host_lock, flags); bdr_scmd = NULL; list_for_each_entry(scmd, work_q, eh_entry) if (scmd->device == sdev) { @@ -1303,6 +1387,21 @@ static int scsi_eh_target_reset(struct Scsi_Host *shost, struct scsi_cmnd *next, *scmd; int rtn; unsigned int id; + unsigned long flags; + + spin_lock_irqsave(shost->host_lock, flags); + if (scsi_host_eh_past_deadline(shost)) { + spin_unlock_irqrestore(shost->host_lock, flags); + /* push back on work queue for further processing */ + list_splice_init(&check_list, work_q); + list_splice_init(&tmp_list, work_q); + SCSI_LOG_ERROR_RECOVERY(3, + shost_printk(KERN_INFO, shost, + "skip %s, past eh deadline\n", + __func__)); + return list_empty(work_q); + } + spin_unlock_irqrestore(shost->host_lock, flags); scmd = list_entry(tmp_list.next, struct scsi_cmnd, eh_entry); id = scmd_id(scmd); @@ -1347,6 +1446,7 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost, LIST_HEAD(check_list); unsigned int channel; int rtn; + unsigned long flags; /* * we really want to loop over the various channels, and do this on @@ -1356,6 +1456,18 @@ static int scsi_eh_bus_reset(struct Scsi_Host *shost, */ for (channel = 0; channel <= shost->max_channel; channel++) { + spin_lock_irqsave(shost->host_lock, flags); + if (scsi_host_eh_past_deadline(shost)) { + spin_unlock_irqrestore(shost->host_lock, flags); + list_splice_init(&check_list, work_q); + SCSI_LOG_ERROR_RECOVERY(3, + shost_printk(KERN_INFO, shost, + "skip %s, past eh deadline\n", + __func__)); + return list_empty(work_q); + } + spin_unlock_irqrestore(shost->host_lock, flags); + chan_scmd = NULL; list_for_each_entry(scmd, work_q, eh_entry) { if (channel == scmd_channel(scmd)) { @@ -1755,8 +1867,9 @@ static void scsi_restart_operations(struct Scsi_Host *shost) * will be requests for character device operations, and also for * ioctls to queued block devices. */ - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n", - __func__)); + SCSI_LOG_ERROR_RECOVERY(3, + printk("scsi_eh_%d waking up host to restart\n", + shost->host_no)); spin_lock_irqsave(shost->host_lock, flags); if (scsi_host_set_state(shost, SHOST_RUNNING)) @@ -1883,6 +1996,10 @@ static void scsi_unjam_host(struct Scsi_Host *shost) if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q)) scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q); + spin_lock_irqsave(shost->host_lock, flags); + if (shost->eh_deadline) + shost->last_reset = 0; + spin_unlock_irqrestore(shost->host_lock, flags); scsi_eh_flush_done_q(&eh_done_q); } @@ -1909,7 +2026,7 @@ int scsi_error_handler(void *data) if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) || shost->host_failed != shost->host_busy) { SCSI_LOG_ERROR_RECOVERY(1, - printk("Error handler scsi_eh_%d sleeping\n", + printk("scsi_eh_%d: sleeping\n", shost->host_no)); schedule(); continue; @@ -1917,8 +2034,9 @@ int scsi_error_handler(void *data) __set_current_state(TASK_RUNNING); SCSI_LOG_ERROR_RECOVERY(1, - printk("Error handler scsi_eh_%d waking up\n", - shost->host_no)); + printk("scsi_eh_%d: waking up %d/%d/%d\n", + shost->host_no, shost->host_eh_scheduled, + shost->host_failed, shost->host_busy)); /* * We have a host that is failing for some reason. Figure out diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index a73471074a02..8ff62c26a41c 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -281,6 +281,42 @@ exit_store_host_reset: static DEVICE_ATTR(host_reset, S_IWUSR, NULL, store_host_reset); +static ssize_t +show_shost_eh_deadline(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + + return sprintf(buf, "%d\n", shost->eh_deadline / HZ); +} + +static ssize_t +store_shost_eh_deadline(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct Scsi_Host *shost = class_to_shost(dev); + int ret = -EINVAL; + int deadline; + unsigned long flags; + + if (shost->transportt && shost->transportt->eh_strategy_handler) + return ret; + + if (sscanf(buf, "%d\n", &deadline) == 1) { + spin_lock_irqsave(shost->host_lock, flags); + if (scsi_host_in_recovery(shost)) + ret = -EBUSY; + else { + shost->eh_deadline = deadline * HZ; + ret = count; + } + spin_unlock_irqrestore(shost->host_lock, flags); + } + return ret; +} + +static DEVICE_ATTR(eh_deadline, S_IRUGO | S_IWUSR, show_shost_eh_deadline, store_shost_eh_deadline); + shost_rd_attr(unique_id, "%u\n"); shost_rd_attr(host_busy, "%hu\n"); shost_rd_attr(cmd_per_lun, "%hd\n"); @@ -308,6 +344,7 @@ static struct attribute *scsi_sysfs_shost_attrs[] = { &dev_attr_prot_capabilities.attr, &dev_attr_prot_guard_type.attr, &dev_attr_host_reset.attr, + &dev_attr_eh_deadline.attr, NULL }; diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index a74b7d9afe8e..546084964d55 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -599,6 +599,11 @@ struct Scsi_Host { unsigned int host_no; /* Used for IOCTL_GET_IDLUN, /proc/scsi et al. */ + /* next two fields are used to bound the time spent in error handling */ + int eh_deadline; + unsigned long last_reset; + + /* * These three parameters can be used to allow for wide scsi, * and for host adapters that support multiple busses -- cgit v1.2.3 From 9dd69a600a680fab1c9235a644c886d8d6a2da2a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 26 Oct 2013 14:32:30 +0200 Subject: IB/srp: Keep rport as long as the IB transport layer Keep the rport data structure around after srp_remove_host() has finished until cleanup of the IB transport layer has finished completely. This is necessary because later patches use the rport pointer inside the queuecommand callback. Without this patch accessing the rport from inside a queuecommand callback is racy because srp_remove_host() must be invoked before scsi_remove_host() and because the queuecommand callback could get invoked after srp_remove_host() has finished. In other words, without this patch the queuecommand callback can get invoked after the rport data structure has been freed. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 3 +++ drivers/infiniband/ulp/srp/ib_srp.h | 1 + drivers/scsi/scsi_transport_srp.c | 18 ++++++++++++++++++ include/scsi/scsi_transport_srp.h | 2 ++ 4 files changed, 24 insertions(+) (limited to 'include/scsi') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 11ebf3e7646a..6edab7855f5e 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -528,11 +528,13 @@ static void srp_remove_target(struct srp_target_port *target) WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); srp_del_scsi_host_attr(target->scsi_host); + srp_rport_get(target->rport); srp_remove_host(target->scsi_host); scsi_remove_host(target->scsi_host); srp_disconnect_target(target); ib_destroy_cm_id(target->cm_id); srp_free_target_ib(target); + srp_rport_put(target->rport); srp_free_req_data(target); scsi_host_put(target->scsi_host); } @@ -2004,6 +2006,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) } rport->lld_data = target; + target->rport = rport; spin_lock(&host->target_lock); list_add_tail(&target->list, &host->target_list); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 84d821b24ec7..2a1768fcc57d 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -153,6 +153,7 @@ struct srp_target_port { u16 io_class; struct srp_host *srp_host; struct Scsi_Host *scsi_host; + struct srp_rport *rport; char target_name[32]; unsigned int scsi_id; unsigned int sg_tablesize; diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index f379c7f3034c..f7ba94aa52cb 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -184,6 +184,24 @@ static int srp_host_match(struct attribute_container *cont, struct device *dev) return &i->t.host_attrs.ac == cont; } +/** + * srp_rport_get() - increment rport reference count + */ +void srp_rport_get(struct srp_rport *rport) +{ + get_device(&rport->dev); +} +EXPORT_SYMBOL(srp_rport_get); + +/** + * srp_rport_put() - decrement rport reference count + */ +void srp_rport_put(struct srp_rport *rport) +{ + put_device(&rport->dev); +} +EXPORT_SYMBOL(srp_rport_put); + /** * srp_rport_add - add a SRP remote port to the device hierarchy * @shost: scsi host the remote port is connected to. diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h index ff0f04ac91aa..5a2d2d1081c1 100644 --- a/include/scsi/scsi_transport_srp.h +++ b/include/scsi/scsi_transport_srp.h @@ -38,6 +38,8 @@ extern struct scsi_transport_template * srp_attach_transport(struct srp_function_template *); extern void srp_release_transport(struct scsi_transport_template *); +extern void srp_rport_get(struct srp_rport *rport); +extern void srp_rport_put(struct srp_rport *rport); extern struct srp_rport *srp_rport_add(struct Scsi_Host *, struct srp_rport_identifiers *); extern void srp_rport_del(struct srp_rport *); -- cgit v1.2.3 From 29c17324803c8a3bb5b2b69309e43571164cc4de Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 26 Oct 2013 14:33:30 +0200 Subject: scsi_transport_srp: Add transport layer error handling Add the necessary functions in the SRP transport module to allow an SRP initiator driver to implement transport layer error handling similar to the functionality already provided by the FC transport layer. This includes: - Support for implementing fast_io_fail_tmo, the time that should elapse after having detected a transport layer problem and before failing I/O. - Support for implementing dev_loss_tmo, the time that should elapse after having detected a transport layer problem and before removing a remote port. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- Documentation/ABI/stable/sysfs-transport-srp | 31 ++ drivers/scsi/scsi_transport_srp.c | 430 ++++++++++++++++++++++++++- include/scsi/scsi_transport_srp.h | 74 ++++- 3 files changed, 532 insertions(+), 3 deletions(-) (limited to 'include/scsi') diff --git a/Documentation/ABI/stable/sysfs-transport-srp b/Documentation/ABI/stable/sysfs-transport-srp index b36fb0dc13c8..8b6acc775b71 100644 --- a/Documentation/ABI/stable/sysfs-transport-srp +++ b/Documentation/ABI/stable/sysfs-transport-srp @@ -5,6 +5,24 @@ Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org Description: Instructs an SRP initiator to disconnect from a target and to remove all LUNs imported from that target. +What: /sys/class/srp_remote_ports/port-:/dev_loss_tmo +Date: February 1, 2014 +KernelVersion: 3.13 +Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org +Description: Number of seconds the SCSI layer will wait after a transport + layer error has been observed before removing a target port. + Zero means immediate removal. Setting this attribute to "off" + will disable the dev_loss timer. + +What: /sys/class/srp_remote_ports/port-:/fast_io_fail_tmo +Date: February 1, 2014 +KernelVersion: 3.13 +Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org +Description: Number of seconds the SCSI layer will wait after a transport + layer error has been observed before failing I/O. Zero means + failing I/O immediately. Setting this attribute to "off" will + disable the fast_io_fail timer. + What: /sys/class/srp_remote_ports/port-:/port_id Date: June 27, 2007 KernelVersion: 2.6.24 @@ -17,3 +35,16 @@ Date: June 27, 2007 KernelVersion: 2.6.24 Contact: linux-scsi@vger.kernel.org Description: Role of the remote port. Either "SRP Initiator" or "SRP Target". + +What: /sys/class/srp_remote_ports/port-:/state +Date: February 1, 2014 +KernelVersion: 3.13 +Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org +Description: State of the transport layer used for communication with the + remote port. "running" if the transport layer is operational; + "blocked" if a transport layer error has been encountered but + the fast_io_fail_tmo timer has not yet fired; "fail-fast" + after the fast_io_fail_tmo timer has fired and before the + "dev_loss_tmo" timer has fired; "lost" after the + "dev_loss_tmo" timer has fired and before the port is finally + removed. diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index f7ba94aa52cb..2696e26b3423 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -24,12 +24,15 @@ #include #include #include +#include #include +#include #include #include #include #include +#include "scsi_priv.h" #include "scsi_transport_srp_internal.h" struct srp_host_attrs { @@ -38,7 +41,7 @@ struct srp_host_attrs { #define to_srp_host_attrs(host) ((struct srp_host_attrs *)(host)->shost_data) #define SRP_HOST_ATTRS 0 -#define SRP_RPORT_ATTRS 3 +#define SRP_RPORT_ATTRS 6 struct srp_internal { struct scsi_transport_template t; @@ -54,6 +57,34 @@ struct srp_internal { #define dev_to_rport(d) container_of(d, struct srp_rport, dev) #define transport_class_to_srp_rport(dev) dev_to_rport((dev)->parent) +static inline struct Scsi_Host *rport_to_shost(struct srp_rport *r) +{ + return dev_to_shost(r->dev.parent); +} + +/** + * srp_tmo_valid() - check timeout combination validity + * + * The combination of the timeout parameters must be such that SCSI commands + * are finished in a reasonable time. Hence do not allow the fast I/O fail + * timeout to exceed SCSI_DEVICE_BLOCK_MAX_TIMEOUT. Furthermore, these + * parameters must be such that multipath can detect failed paths timely. + * Hence do not allow both parameters to be disabled simultaneously. + */ +int srp_tmo_valid(int fast_io_fail_tmo, int dev_loss_tmo) +{ + if (fast_io_fail_tmo < 0 && dev_loss_tmo < 0) + return -EINVAL; + if (fast_io_fail_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT) + return -EINVAL; + if (dev_loss_tmo >= LONG_MAX / HZ) + return -EINVAL; + if (fast_io_fail_tmo >= 0 && dev_loss_tmo >= 0 && + fast_io_fail_tmo >= dev_loss_tmo) + return -EINVAL; + return 0; +} +EXPORT_SYMBOL_GPL(srp_tmo_valid); static int srp_host_setup(struct transport_container *tc, struct device *dev, struct device *cdev) @@ -134,10 +165,383 @@ static ssize_t store_srp_rport_delete(struct device *dev, static DEVICE_ATTR(delete, S_IWUSR, NULL, store_srp_rport_delete); +static ssize_t show_srp_rport_state(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + static const char *const state_name[] = { + [SRP_RPORT_RUNNING] = "running", + [SRP_RPORT_BLOCKED] = "blocked", + [SRP_RPORT_FAIL_FAST] = "fail-fast", + [SRP_RPORT_LOST] = "lost", + }; + struct srp_rport *rport = transport_class_to_srp_rport(dev); + enum srp_rport_state state = rport->state; + + return sprintf(buf, "%s\n", + (unsigned)state < ARRAY_SIZE(state_name) ? + state_name[state] : "???"); +} + +static DEVICE_ATTR(state, S_IRUGO, show_srp_rport_state, NULL); + +static ssize_t srp_show_tmo(char *buf, int tmo) +{ + return tmo >= 0 ? sprintf(buf, "%d\n", tmo) : sprintf(buf, "off\n"); +} + +static int srp_parse_tmo(int *tmo, const char *buf) +{ + int res = 0; + + if (strncmp(buf, "off", 3) != 0) + res = kstrtoint(buf, 0, tmo); + else + *tmo = -1; + + return res; +} + +static ssize_t show_srp_rport_fast_io_fail_tmo(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct srp_rport *rport = transport_class_to_srp_rport(dev); + + return srp_show_tmo(buf, rport->fast_io_fail_tmo); +} + +static ssize_t store_srp_rport_fast_io_fail_tmo(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct srp_rport *rport = transport_class_to_srp_rport(dev); + int res; + int fast_io_fail_tmo; + + res = srp_parse_tmo(&fast_io_fail_tmo, buf); + if (res) + goto out; + res = srp_tmo_valid(fast_io_fail_tmo, rport->dev_loss_tmo); + if (res) + goto out; + rport->fast_io_fail_tmo = fast_io_fail_tmo; + res = count; + +out: + return res; +} + +static DEVICE_ATTR(fast_io_fail_tmo, S_IRUGO | S_IWUSR, + show_srp_rport_fast_io_fail_tmo, + store_srp_rport_fast_io_fail_tmo); + +static ssize_t show_srp_rport_dev_loss_tmo(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct srp_rport *rport = transport_class_to_srp_rport(dev); + + return srp_show_tmo(buf, rport->dev_loss_tmo); +} + +static ssize_t store_srp_rport_dev_loss_tmo(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct srp_rport *rport = transport_class_to_srp_rport(dev); + int res; + int dev_loss_tmo; + + res = srp_parse_tmo(&dev_loss_tmo, buf); + if (res) + goto out; + res = srp_tmo_valid(rport->fast_io_fail_tmo, dev_loss_tmo); + if (res) + goto out; + rport->dev_loss_tmo = dev_loss_tmo; + res = count; + +out: + return res; +} + +static DEVICE_ATTR(dev_loss_tmo, S_IRUGO | S_IWUSR, + show_srp_rport_dev_loss_tmo, + store_srp_rport_dev_loss_tmo); + +static int srp_rport_set_state(struct srp_rport *rport, + enum srp_rport_state new_state) +{ + enum srp_rport_state old_state = rport->state; + + lockdep_assert_held(&rport->mutex); + + switch (new_state) { + case SRP_RPORT_RUNNING: + switch (old_state) { + case SRP_RPORT_LOST: + goto invalid; + default: + break; + } + break; + case SRP_RPORT_BLOCKED: + switch (old_state) { + case SRP_RPORT_RUNNING: + break; + default: + goto invalid; + } + break; + case SRP_RPORT_FAIL_FAST: + switch (old_state) { + case SRP_RPORT_LOST: + goto invalid; + default: + break; + } + break; + case SRP_RPORT_LOST: + break; + } + rport->state = new_state; + return 0; + +invalid: + return -EINVAL; +} + +static void __rport_fail_io_fast(struct srp_rport *rport) +{ + struct Scsi_Host *shost = rport_to_shost(rport); + struct srp_internal *i; + + lockdep_assert_held(&rport->mutex); + + if (srp_rport_set_state(rport, SRP_RPORT_FAIL_FAST)) + return; + scsi_target_unblock(rport->dev.parent, SDEV_TRANSPORT_OFFLINE); + + /* Involve the LLD if possible to terminate all I/O on the rport. */ + i = to_srp_internal(shost->transportt); + if (i->f->terminate_rport_io) + i->f->terminate_rport_io(rport); +} + +/** + * rport_fast_io_fail_timedout() - fast I/O failure timeout handler + */ +static void rport_fast_io_fail_timedout(struct work_struct *work) +{ + struct srp_rport *rport = container_of(to_delayed_work(work), + struct srp_rport, fast_io_fail_work); + struct Scsi_Host *shost = rport_to_shost(rport); + + pr_info("fast_io_fail_tmo expired for SRP %s / %s.\n", + dev_name(&rport->dev), dev_name(&shost->shost_gendev)); + + mutex_lock(&rport->mutex); + if (rport->state == SRP_RPORT_BLOCKED) + __rport_fail_io_fast(rport); + mutex_unlock(&rport->mutex); +} + +/** + * rport_dev_loss_timedout() - device loss timeout handler + */ +static void rport_dev_loss_timedout(struct work_struct *work) +{ + struct srp_rport *rport = container_of(to_delayed_work(work), + struct srp_rport, dev_loss_work); + struct Scsi_Host *shost = rport_to_shost(rport); + struct srp_internal *i = to_srp_internal(shost->transportt); + + pr_info("dev_loss_tmo expired for SRP %s / %s.\n", + dev_name(&rport->dev), dev_name(&shost->shost_gendev)); + + mutex_lock(&rport->mutex); + WARN_ON(srp_rport_set_state(rport, SRP_RPORT_LOST) != 0); + scsi_target_unblock(rport->dev.parent, SDEV_TRANSPORT_OFFLINE); + mutex_unlock(&rport->mutex); + + i->f->rport_delete(rport); +} + +static void __srp_start_tl_fail_timers(struct srp_rport *rport) +{ + struct Scsi_Host *shost = rport_to_shost(rport); + int fast_io_fail_tmo, dev_loss_tmo; + + lockdep_assert_held(&rport->mutex); + + if (!rport->deleted) { + fast_io_fail_tmo = rport->fast_io_fail_tmo; + dev_loss_tmo = rport->dev_loss_tmo; + pr_debug("%s current state: %d\n", + dev_name(&shost->shost_gendev), rport->state); + + if (fast_io_fail_tmo >= 0 && + srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) { + pr_debug("%s new state: %d\n", + dev_name(&shost->shost_gendev), + rport->state); + scsi_target_block(&shost->shost_gendev); + queue_delayed_work(system_long_wq, + &rport->fast_io_fail_work, + 1UL * fast_io_fail_tmo * HZ); + } + if (dev_loss_tmo >= 0) + queue_delayed_work(system_long_wq, + &rport->dev_loss_work, + 1UL * dev_loss_tmo * HZ); + } else { + pr_debug("%s has already been deleted\n", + dev_name(&shost->shost_gendev)); + srp_rport_set_state(rport, SRP_RPORT_FAIL_FAST); + scsi_target_unblock(&shost->shost_gendev, + SDEV_TRANSPORT_OFFLINE); + } +} + +/** + * srp_start_tl_fail_timers() - start the transport layer failure timers + * + * Start the transport layer fast I/O failure and device loss timers. Do not + * modify a timer that was already started. + */ +void srp_start_tl_fail_timers(struct srp_rport *rport) +{ + mutex_lock(&rport->mutex); + __srp_start_tl_fail_timers(rport); + mutex_unlock(&rport->mutex); +} +EXPORT_SYMBOL(srp_start_tl_fail_timers); + +/** + * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn() + */ +static int scsi_request_fn_active(struct Scsi_Host *shost) +{ + struct scsi_device *sdev; + struct request_queue *q; + int request_fn_active = 0; + + shost_for_each_device(sdev, shost) { + q = sdev->request_queue; + + spin_lock_irq(q->queue_lock); + request_fn_active += q->request_fn_active; + spin_unlock_irq(q->queue_lock); + } + + return request_fn_active; +} + +/** + * srp_reconnect_rport() - reconnect to an SRP target port + * + * Blocks SCSI command queueing before invoking reconnect() such that + * queuecommand() won't be invoked concurrently with reconnect() from outside + * the SCSI EH. This is important since a reconnect() implementation may + * reallocate resources needed by queuecommand(). + * + * Notes: + * - This function neither waits until outstanding requests have finished nor + * tries to abort these. It is the responsibility of the reconnect() + * function to finish outstanding commands before reconnecting to the target + * port. + * - It is the responsibility of the caller to ensure that the resources + * reallocated by the reconnect() function won't be used while this function + * is in progress. One possible strategy is to invoke this function from + * the context of the SCSI EH thread only. Another possible strategy is to + * lock the rport mutex inside each SCSI LLD callback that can be invoked by + * the SCSI EH (the scsi_host_template.eh_*() functions and also the + * scsi_host_template.queuecommand() function). + */ +int srp_reconnect_rport(struct srp_rport *rport) +{ + struct Scsi_Host *shost = rport_to_shost(rport); + struct srp_internal *i = to_srp_internal(shost->transportt); + struct scsi_device *sdev; + int res; + + pr_debug("SCSI host %s\n", dev_name(&shost->shost_gendev)); + + res = mutex_lock_interruptible(&rport->mutex); + if (res) + goto out; + scsi_target_block(&shost->shost_gendev); + while (scsi_request_fn_active(shost)) + msleep(20); + res = i->f->reconnect(rport); + pr_debug("%s (state %d): transport.reconnect() returned %d\n", + dev_name(&shost->shost_gendev), rport->state, res); + if (res == 0) { + cancel_delayed_work(&rport->fast_io_fail_work); + cancel_delayed_work(&rport->dev_loss_work); + + srp_rport_set_state(rport, SRP_RPORT_RUNNING); + scsi_target_unblock(&shost->shost_gendev, SDEV_RUNNING); + /* + * If the SCSI error handler has offlined one or more devices, + * invoking scsi_target_unblock() won't change the state of + * these devices into running so do that explicitly. + */ + spin_lock_irq(shost->host_lock); + __shost_for_each_device(sdev, shost) + if (sdev->sdev_state == SDEV_OFFLINE) + sdev->sdev_state = SDEV_RUNNING; + spin_unlock_irq(shost->host_lock); + } else if (rport->state == SRP_RPORT_RUNNING) { + /* + * srp_reconnect_rport() was invoked with fast_io_fail + * off. Mark the port as failed and start the TL failure + * timers if these had not yet been started. + */ + __rport_fail_io_fast(rport); + scsi_target_unblock(&shost->shost_gendev, + SDEV_TRANSPORT_OFFLINE); + __srp_start_tl_fail_timers(rport); + } else if (rport->state != SRP_RPORT_BLOCKED) { + scsi_target_unblock(&shost->shost_gendev, + SDEV_TRANSPORT_OFFLINE); + } + mutex_unlock(&rport->mutex); + +out: + return res; +} +EXPORT_SYMBOL(srp_reconnect_rport); + +/** + * srp_timed_out() - SRP transport intercept of the SCSI timeout EH + * + * If a timeout occurs while an rport is in the blocked state, ask the SCSI + * EH to continue waiting (BLK_EH_RESET_TIMER). Otherwise let the SCSI core + * handle the timeout (BLK_EH_NOT_HANDLED). + * + * Note: This function is called from soft-IRQ context and with the request + * queue lock held. + */ +static enum blk_eh_timer_return srp_timed_out(struct scsi_cmnd *scmd) +{ + struct scsi_device *sdev = scmd->device; + struct Scsi_Host *shost = sdev->host; + struct srp_internal *i = to_srp_internal(shost->transportt); + + pr_debug("timeout for sdev %s\n", dev_name(&sdev->sdev_gendev)); + return i->f->reset_timer_if_blocked && scsi_device_blocked(sdev) ? + BLK_EH_RESET_TIMER : BLK_EH_NOT_HANDLED; +} + static void srp_rport_release(struct device *dev) { struct srp_rport *rport = dev_to_rport(dev); + cancel_delayed_work_sync(&rport->fast_io_fail_work); + cancel_delayed_work_sync(&rport->dev_loss_work); + put_device(dev->parent); kfree(rport); } @@ -214,12 +618,15 @@ struct srp_rport *srp_rport_add(struct Scsi_Host *shost, { struct srp_rport *rport; struct device *parent = &shost->shost_gendev; + struct srp_internal *i = to_srp_internal(shost->transportt); int id, ret; rport = kzalloc(sizeof(*rport), GFP_KERNEL); if (!rport) return ERR_PTR(-ENOMEM); + mutex_init(&rport->mutex); + device_initialize(&rport->dev); rport->dev.parent = get_device(parent); @@ -228,6 +635,13 @@ struct srp_rport *srp_rport_add(struct Scsi_Host *shost, memcpy(rport->port_id, ids->port_id, sizeof(rport->port_id)); rport->roles = ids->roles; + rport->fast_io_fail_tmo = i->f->fast_io_fail_tmo ? + *i->f->fast_io_fail_tmo : 15; + rport->dev_loss_tmo = i->f->dev_loss_tmo ? *i->f->dev_loss_tmo : 60; + INIT_DELAYED_WORK(&rport->fast_io_fail_work, + rport_fast_io_fail_timedout); + INIT_DELAYED_WORK(&rport->dev_loss_work, rport_dev_loss_timedout); + id = atomic_inc_return(&to_srp_host_attrs(shost)->next_port_id); dev_set_name(&rport->dev, "port-%d:%d", shost->host_no, id); @@ -277,6 +691,13 @@ void srp_rport_del(struct srp_rport *rport) transport_remove_device(dev); device_del(dev); transport_destroy_device(dev); + + mutex_lock(&rport->mutex); + if (rport->state == SRP_RPORT_BLOCKED) + __rport_fail_io_fast(rport); + rport->deleted = true; + mutex_unlock(&rport->mutex); + put_device(dev); } EXPORT_SYMBOL_GPL(srp_rport_del); @@ -328,6 +749,8 @@ srp_attach_transport(struct srp_function_template *ft) if (!i) return NULL; + i->t.eh_timed_out = srp_timed_out; + i->t.tsk_mgmt_response = srp_tsk_mgmt_response; i->t.it_nexus_response = srp_it_nexus_response; @@ -345,6 +768,11 @@ srp_attach_transport(struct srp_function_template *ft) count = 0; i->rport_attrs[count++] = &dev_attr_port_id; i->rport_attrs[count++] = &dev_attr_roles; + if (ft->has_rport_state) { + i->rport_attrs[count++] = &dev_attr_state; + i->rport_attrs[count++] = &dev_attr_fast_io_fail_tmo; + i->rport_attrs[count++] = &dev_attr_dev_loss_tmo; + } if (ft->rport_delete) i->rport_attrs[count++] = &dev_attr_delete; i->rport_attrs[count++] = NULL; diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h index 5a2d2d1081c1..ee7001677f64 100644 --- a/include/scsi/scsi_transport_srp.h +++ b/include/scsi/scsi_transport_srp.h @@ -13,6 +13,26 @@ struct srp_rport_identifiers { u8 roles; }; +/** + * enum srp_rport_state - SRP transport layer state + * @SRP_RPORT_RUNNING: Transport layer operational. + * @SRP_RPORT_BLOCKED: Transport layer not operational; fast I/O fail timer + * is running and I/O has been blocked. + * @SRP_RPORT_FAIL_FAST: Fast I/O fail timer has expired; fail I/O fast. + * @SRP_RPORT_LOST: Device loss timer has expired; port is being removed. + */ +enum srp_rport_state { + SRP_RPORT_RUNNING, + SRP_RPORT_BLOCKED, + SRP_RPORT_FAIL_FAST, + SRP_RPORT_LOST, +}; + +/** + * struct srp_rport + * @lld_data: LLD private data. + * @mutex: Protects against concurrent rport fast_io_fail / dev_loss_tmo. + */ struct srp_rport { /* for initiator and target drivers */ @@ -23,11 +43,38 @@ struct srp_rport { /* for initiator drivers */ - void *lld_data; /* LLD private data */ + void *lld_data; + + struct mutex mutex; + enum srp_rport_state state; + bool deleted; + int fast_io_fail_tmo; + int dev_loss_tmo; + struct delayed_work fast_io_fail_work; + struct delayed_work dev_loss_work; }; +/** + * struct srp_function_template + * @has_rport_state: Whether or not to create the state, fast_io_fail_tmo and + * dev_loss_tmo sysfs attribute for an rport. + * @reset_timer_if_blocked: Whether or srp_timed_out() should reset the command + * timer if the device on which it has been queued is blocked. + * @fast_io_fail_tmo: If not NULL, points to the default fast_io_fail_tmo value. + * @dev_loss_tmo: If not NULL, points to the default dev_loss_tmo value. + * @reconnect: Callback function for reconnecting to the target. See also + * srp_reconnect_rport(). + * @terminate_rport_io: Callback function for terminating all outstanding I/O + * requests for an rport. + */ struct srp_function_template { /* for initiator drivers */ + bool has_rport_state; + bool reset_timer_if_blocked; + int *fast_io_fail_tmo; + int *dev_loss_tmo; + int (*reconnect)(struct srp_rport *rport); + void (*terminate_rport_io)(struct srp_rport *rport); void (*rport_delete)(struct srp_rport *rport); /* for target drivers */ int (* tsk_mgmt_response)(struct Scsi_Host *, u64, u64, int); @@ -43,7 +90,30 @@ extern void srp_rport_put(struct srp_rport *rport); extern struct srp_rport *srp_rport_add(struct Scsi_Host *, struct srp_rport_identifiers *); extern void srp_rport_del(struct srp_rport *); - +extern int srp_tmo_valid(int fast_io_fail_tmo, int dev_loss_tmo); +extern int srp_reconnect_rport(struct srp_rport *rport); +extern void srp_start_tl_fail_timers(struct srp_rport *rport); extern void srp_remove_host(struct Scsi_Host *); +/** + * srp_chkready() - evaluate the transport layer state before I/O + * + * Returns a SCSI result code that can be returned by the LLD queuecommand() + * implementation. The role of this function is similar to that of + * fc_remote_port_chkready(). + */ +static inline int srp_chkready(struct srp_rport *rport) +{ + switch (rport->state) { + case SRP_RPORT_RUNNING: + case SRP_RPORT_BLOCKED: + default: + return 0; + case SRP_RPORT_FAIL_FAST: + return DID_TRANSPORT_FAILFAST << 16; + case SRP_RPORT_LOST: + return DID_NO_CONNECT << 16; + } +} + #endif -- cgit v1.2.3 From 8c64e4531c3c3bedf11d723196270d4a7553db45 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 26 Oct 2013 14:35:59 +0200 Subject: scsi_transport_srp: Add periodic reconnect support Add support for periodically reconnecting to an SRP target until the dev_loss timer expires. After the tenth reconnection attempt, gradually slow down subsequent reconnect attempts. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- Documentation/ABI/stable/sysfs-transport-srp | 8 ++ drivers/infiniband/ulp/srp/ib_srp.c | 4 +- drivers/scsi/scsi_transport_srp.c | 106 +++++++++++++++++++++++++-- include/scsi/scsi_transport_srp.h | 11 ++- 4 files changed, 118 insertions(+), 11 deletions(-) (limited to 'include/scsi') diff --git a/Documentation/ABI/stable/sysfs-transport-srp b/Documentation/ABI/stable/sysfs-transport-srp index 8b6acc775b71..ec7af69fea0a 100644 --- a/Documentation/ABI/stable/sysfs-transport-srp +++ b/Documentation/ABI/stable/sysfs-transport-srp @@ -30,6 +30,14 @@ Contact: linux-scsi@vger.kernel.org Description: 16-byte local SRP port identifier in hexadecimal format. An example: 4c:49:4e:55:58:20:56:49:4f:00:00:00:00:00:00:00. +What: /sys/class/srp_remote_ports/port-:/reconnect_delay +Date: February 1, 2014 +KernelVersion: 3.13 +Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org +Description: Number of seconds the SCSI layer will wait after a reconnect + attempt failed before retrying. Setting this attribute to + "off" will disable time-based reconnecting. + What: /sys/class/srp_remote_ports/port-:/roles Date: June 27, 2007 KernelVersion: 2.6.24 diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 076a3ab7f779..99c893d1c2ac 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -145,9 +145,9 @@ static int srp_tmo_set(const char *val, const struct kernel_param *kp) tmo = -1; } if (kp->arg == &srp_fast_io_fail_tmo) - res = srp_tmo_valid(tmo, srp_dev_loss_tmo); + res = srp_tmo_valid(-1, tmo, srp_dev_loss_tmo); else - res = srp_tmo_valid(srp_fast_io_fail_tmo, tmo); + res = srp_tmo_valid(-1, srp_fast_io_fail_tmo, tmo); if (res) goto out; *(int *)kp->arg = tmo; diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index 2696e26b3423..2700a5a09bd4 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -41,7 +41,7 @@ struct srp_host_attrs { #define to_srp_host_attrs(host) ((struct srp_host_attrs *)(host)->shost_data) #define SRP_HOST_ATTRS 0 -#define SRP_RPORT_ATTRS 6 +#define SRP_RPORT_ATTRS 8 struct srp_internal { struct scsi_transport_template t; @@ -69,11 +69,13 @@ static inline struct Scsi_Host *rport_to_shost(struct srp_rport *r) * are finished in a reasonable time. Hence do not allow the fast I/O fail * timeout to exceed SCSI_DEVICE_BLOCK_MAX_TIMEOUT. Furthermore, these * parameters must be such that multipath can detect failed paths timely. - * Hence do not allow both parameters to be disabled simultaneously. + * Hence do not allow all three parameters to be disabled simultaneously. */ -int srp_tmo_valid(int fast_io_fail_tmo, int dev_loss_tmo) +int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo, int dev_loss_tmo) { - if (fast_io_fail_tmo < 0 && dev_loss_tmo < 0) + if (reconnect_delay < 0 && fast_io_fail_tmo < 0 && dev_loss_tmo < 0) + return -EINVAL; + if (reconnect_delay == 0) return -EINVAL; if (fast_io_fail_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT) return -EINVAL; @@ -202,6 +204,56 @@ static int srp_parse_tmo(int *tmo, const char *buf) return res; } +static ssize_t show_reconnect_delay(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_rport *rport = transport_class_to_srp_rport(dev); + + return srp_show_tmo(buf, rport->reconnect_delay); +} + +static ssize_t store_reconnect_delay(struct device *dev, + struct device_attribute *attr, + const char *buf, const size_t count) +{ + struct srp_rport *rport = transport_class_to_srp_rport(dev); + int res, delay; + + res = srp_parse_tmo(&delay, buf); + if (res) + goto out; + res = srp_tmo_valid(delay, rport->fast_io_fail_tmo, + rport->dev_loss_tmo); + if (res) + goto out; + + if (rport->reconnect_delay <= 0 && delay > 0 && + rport->state != SRP_RPORT_RUNNING) { + queue_delayed_work(system_long_wq, &rport->reconnect_work, + delay * HZ); + } else if (delay <= 0) { + cancel_delayed_work(&rport->reconnect_work); + } + rport->reconnect_delay = delay; + res = count; + +out: + return res; +} + +static DEVICE_ATTR(reconnect_delay, S_IRUGO | S_IWUSR, show_reconnect_delay, + store_reconnect_delay); + +static ssize_t show_failed_reconnects(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_rport *rport = transport_class_to_srp_rport(dev); + + return sprintf(buf, "%d\n", rport->failed_reconnects); +} + +static DEVICE_ATTR(failed_reconnects, S_IRUGO, show_failed_reconnects, NULL); + static ssize_t show_srp_rport_fast_io_fail_tmo(struct device *dev, struct device_attribute *attr, char *buf) @@ -222,7 +274,8 @@ static ssize_t store_srp_rport_fast_io_fail_tmo(struct device *dev, res = srp_parse_tmo(&fast_io_fail_tmo, buf); if (res) goto out; - res = srp_tmo_valid(fast_io_fail_tmo, rport->dev_loss_tmo); + res = srp_tmo_valid(rport->reconnect_delay, fast_io_fail_tmo, + rport->dev_loss_tmo); if (res) goto out; rport->fast_io_fail_tmo = fast_io_fail_tmo; @@ -256,7 +309,8 @@ static ssize_t store_srp_rport_dev_loss_tmo(struct device *dev, res = srp_parse_tmo(&dev_loss_tmo, buf); if (res) goto out; - res = srp_tmo_valid(rport->fast_io_fail_tmo, dev_loss_tmo); + res = srp_tmo_valid(rport->reconnect_delay, rport->fast_io_fail_tmo, + dev_loss_tmo); if (res) goto out; rport->dev_loss_tmo = dev_loss_tmo; @@ -312,6 +366,29 @@ invalid: return -EINVAL; } +/** + * srp_reconnect_work() - reconnect and schedule a new attempt if necessary + */ +static void srp_reconnect_work(struct work_struct *work) +{ + struct srp_rport *rport = container_of(to_delayed_work(work), + struct srp_rport, reconnect_work); + struct Scsi_Host *shost = rport_to_shost(rport); + int delay, res; + + res = srp_reconnect_rport(rport); + if (res != 0) { + shost_printk(KERN_ERR, shost, + "reconnect attempt %d failed (%d)\n", + ++rport->failed_reconnects, res); + delay = rport->reconnect_delay * + min(100, max(1, rport->failed_reconnects - 10)); + if (delay > 0) + queue_delayed_work(system_long_wq, + &rport->reconnect_work, delay * HZ); + } +} + static void __rport_fail_io_fast(struct srp_rport *rport) { struct Scsi_Host *shost = rport_to_shost(rport); @@ -371,16 +448,21 @@ static void rport_dev_loss_timedout(struct work_struct *work) static void __srp_start_tl_fail_timers(struct srp_rport *rport) { struct Scsi_Host *shost = rport_to_shost(rport); - int fast_io_fail_tmo, dev_loss_tmo; + int delay, fast_io_fail_tmo, dev_loss_tmo; lockdep_assert_held(&rport->mutex); if (!rport->deleted) { + delay = rport->reconnect_delay; fast_io_fail_tmo = rport->fast_io_fail_tmo; dev_loss_tmo = rport->dev_loss_tmo; pr_debug("%s current state: %d\n", dev_name(&shost->shost_gendev), rport->state); + if (delay > 0) + queue_delayed_work(system_long_wq, + &rport->reconnect_work, + 1UL * delay * HZ); if (fast_io_fail_tmo >= 0 && srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) { pr_debug("%s new state: %d\n", @@ -481,6 +563,7 @@ int srp_reconnect_rport(struct srp_rport *rport) cancel_delayed_work(&rport->fast_io_fail_work); cancel_delayed_work(&rport->dev_loss_work); + rport->failed_reconnects = 0; srp_rport_set_state(rport, SRP_RPORT_RUNNING); scsi_target_unblock(&shost->shost_gendev, SDEV_RUNNING); /* @@ -539,6 +622,7 @@ static void srp_rport_release(struct device *dev) { struct srp_rport *rport = dev_to_rport(dev); + cancel_delayed_work_sync(&rport->reconnect_work); cancel_delayed_work_sync(&rport->fast_io_fail_work); cancel_delayed_work_sync(&rport->dev_loss_work); @@ -635,6 +719,10 @@ struct srp_rport *srp_rport_add(struct Scsi_Host *shost, memcpy(rport->port_id, ids->port_id, sizeof(rport->port_id)); rport->roles = ids->roles; + if (i->f->reconnect) + rport->reconnect_delay = i->f->reconnect_delay ? + *i->f->reconnect_delay : 10; + INIT_DELAYED_WORK(&rport->reconnect_work, srp_reconnect_work); rport->fast_io_fail_tmo = i->f->fast_io_fail_tmo ? *i->f->fast_io_fail_tmo : 15; rport->dev_loss_tmo = i->f->dev_loss_tmo ? *i->f->dev_loss_tmo : 60; @@ -773,6 +861,10 @@ srp_attach_transport(struct srp_function_template *ft) i->rport_attrs[count++] = &dev_attr_fast_io_fail_tmo; i->rport_attrs[count++] = &dev_attr_dev_loss_tmo; } + if (ft->reconnect) { + i->rport_attrs[count++] = &dev_attr_reconnect_delay; + i->rport_attrs[count++] = &dev_attr_failed_reconnects; + } if (ft->rport_delete) i->rport_attrs[count++] = &dev_attr_delete; i->rport_attrs[count++] = NULL; diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h index ee7001677f64..4ebf6913b7b2 100644 --- a/include/scsi/scsi_transport_srp.h +++ b/include/scsi/scsi_transport_srp.h @@ -31,7 +31,8 @@ enum srp_rport_state { /** * struct srp_rport * @lld_data: LLD private data. - * @mutex: Protects against concurrent rport fast_io_fail / dev_loss_tmo. + * @mutex: Protects against concurrent rport reconnect / fast_io_fail / + * dev_loss_tmo activity. */ struct srp_rport { /* for initiator and target drivers */ @@ -48,6 +49,9 @@ struct srp_rport { struct mutex mutex; enum srp_rport_state state; bool deleted; + int reconnect_delay; + int failed_reconnects; + struct delayed_work reconnect_work; int fast_io_fail_tmo; int dev_loss_tmo; struct delayed_work fast_io_fail_work; @@ -60,6 +64,7 @@ struct srp_rport { * dev_loss_tmo sysfs attribute for an rport. * @reset_timer_if_blocked: Whether or srp_timed_out() should reset the command * timer if the device on which it has been queued is blocked. + * @reconnect_delay: If not NULL, points to the default reconnect_delay value. * @fast_io_fail_tmo: If not NULL, points to the default fast_io_fail_tmo value. * @dev_loss_tmo: If not NULL, points to the default dev_loss_tmo value. * @reconnect: Callback function for reconnecting to the target. See also @@ -71,6 +76,7 @@ struct srp_function_template { /* for initiator drivers */ bool has_rport_state; bool reset_timer_if_blocked; + int *reconnect_delay; int *fast_io_fail_tmo; int *dev_loss_tmo; int (*reconnect)(struct srp_rport *rport); @@ -90,7 +96,8 @@ extern void srp_rport_put(struct srp_rport *rport); extern struct srp_rport *srp_rport_add(struct Scsi_Host *, struct srp_rport_identifiers *); extern void srp_rport_del(struct srp_rport *); -extern int srp_tmo_valid(int fast_io_fail_tmo, int dev_loss_tmo); +extern int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo, + int dev_loss_tmo); extern int srp_reconnect_rport(struct srp_rport *rport); extern void srp_start_tl_fail_timers(struct srp_rport *rport); extern void srp_remove_host(struct Scsi_Host *); -- cgit v1.2.3