diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-12-02 13:37:02 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-12-02 13:37:02 -0800 |
commit | ef2cc88e2a205b8a11a19e78db63a70d3728cdf5 (patch) | |
tree | cf6a32c8b4875ef72cb90158f5220d3b150352b9 /drivers/scsi/lpfc | |
parent | 937d6eefc716a9071f0e3bada19200de1bb9d048 (diff) | |
parent | 65309ef6b258f5a7b57c1033a82ba2aba5c434cc (diff) | |
download | lwn-ef2cc88e2a205b8a11a19e78db63a70d3728cdf5.tar.gz lwn-ef2cc88e2a205b8a11a19e78db63a70d3728cdf5.zip |
Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
Pull SCSI updates from James Bottomley:
"This is mostly update of the usual drivers: aacraid, ufs, zfcp,
NCR5380, lpfc, qla2xxx, smartpqi, hisi_sas, target, mpt3sas, pm80xx
plus a whole load of minor updates and fixes.
The major core changes are Al Viro's reworking of sg's handling of
copy to/from user, Ming Lei's removal of the host busy counter to
avoid contention in the multiqueue case and Damien Le Moal's fixing of
residual tracking across error handling"
* tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi: (251 commits)
scsi: bnx2fc: timeout calculation invalid for bnx2fc_eh_abort()
scsi: target: core: Fix a pr_debug() argument
scsi: iscsi: Don't send data to unbound connection
scsi: target: iscsi: Wait for all commands to finish before freeing a session
scsi: target: core: Release SPC-2 reservations when closing a session
scsi: target: core: Document target_cmd_size_check()
scsi: bnx2i: fix potential use after free
Revert "scsi: qla2xxx: Fix memory leak when sending I/O fails"
scsi: NCR5380: Add disconnect_mask module parameter
scsi: NCR5380: Unconditionally clear ICR after do_abort()
scsi: NCR5380: Call scsi_set_resid() on command completion
scsi: scsi_debug: num_tgts must be >= 0
scsi: lpfc: use hdwq assigned cpu for allocation
scsi: arcmsr: fix indentation issues
scsi: qla4xxx: fix double free bug
scsi: pm80xx: Modified the logic to collect fatal dump
scsi: pm80xx: Tie the interrupt name to the module instance
scsi: pm80xx: Controller fatal error through sysfs
scsi: pm80xx: Do not request 12G sas speeds
scsi: pm80xx: Cleanup command when a reset times out
...
Diffstat (limited to 'drivers/scsi/lpfc')
-rw-r--r-- | drivers/scsi/lpfc/lpfc.h | 40 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_attr.c | 298 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_bsg.c | 18 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_crtn.h | 7 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_ct.c | 28 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_debugfs.c | 118 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_els.c | 57 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_hbadisc.c | 200 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_hw4.h | 31 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_init.c | 954 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_logmsg.h | 17 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_mbox.c | 1 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_mem.c | 3 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_nportdisc.c | 149 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_nvme.c | 85 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_nvmet.c | 103 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_nvmet.h | 2 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_scsi.c | 43 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_sli.c | 391 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_sli.h | 3 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_sli4.h | 42 | ||||
-rw-r--r-- | drivers/scsi/lpfc/lpfc_version.h | 2 |
22 files changed, 1998 insertions, 594 deletions
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 691acbdcc46d..935f98804198 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -605,6 +605,12 @@ struct lpfc_epd_pool { spinlock_t lock; /* lock for expedite pool */ }; +enum ras_state { + INACTIVE, + REG_INPROGRESS, + ACTIVE +}; + struct lpfc_ras_fwlog { uint8_t *fwlog_buff; uint32_t fw_buffcount; /* Buffer size posted to FW */ @@ -621,7 +627,7 @@ struct lpfc_ras_fwlog { bool ras_enabled; /* Ras Enabled for the function */ #define LPFC_RAS_DISABLE_LOGGING 0x00 #define LPFC_RAS_ENABLE_LOGGING 0x01 - bool ras_active; /* RAS logging running state */ + enum ras_state state; /* RAS logging running state */ }; struct lpfc_hba { @@ -725,6 +731,7 @@ struct lpfc_hba { #define HBA_FCOE_MODE 0x4 /* HBA function in FCoE Mode */ #define HBA_SP_QUEUE_EVT 0x8 /* Slow-path qevt posted to worker thread*/ #define HBA_POST_RECEIVE_BUFFER 0x10 /* Rcv buffers need to be posted */ +#define HBA_PERSISTENT_TOPO 0x20 /* Persistent topology support in hba */ #define ELS_XRI_ABORT_EVENT 0x40 #define ASYNC_EVENT 0x80 #define LINK_DISABLED 0x100 /* Link disabled by user */ @@ -830,6 +837,7 @@ struct lpfc_hba { uint32_t cfg_fcp_mq_threshold; uint32_t cfg_hdw_queue; uint32_t cfg_irq_chann; + uint32_t cfg_irq_numa; uint32_t cfg_suppress_rsp; uint32_t cfg_nvme_oas; uint32_t cfg_nvme_embed_cmd; @@ -872,7 +880,6 @@ struct lpfc_hba { uint32_t cfg_aer_support; uint32_t cfg_sriov_nr_virtfn; uint32_t cfg_request_firmware_upgrade; - uint32_t cfg_iocb_cnt; uint32_t cfg_suppress_link_up; uint32_t cfg_rrq_xri_bitmap_sz; uint32_t cfg_delay_discovery; @@ -990,7 +997,6 @@ struct lpfc_hba { struct dma_pool *lpfc_drb_pool; /* data receive buffer pool */ struct dma_pool *lpfc_nvmet_drb_pool; /* data receive buffer pool */ struct dma_pool *lpfc_hbq_pool; /* SLI3 hbq buffer pool */ - struct dma_pool *txrdy_payload_pool; struct dma_pool *lpfc_cmd_rsp_buf_pool; struct lpfc_dma_pool lpfc_mbuf_safety_pool; @@ -1055,6 +1061,7 @@ struct lpfc_hba { #ifdef LPFC_HDWQ_LOCK_STAT struct dentry *debug_lockstat; #endif + struct dentry *debug_ras_log; atomic_t nvmeio_trc_cnt; uint32_t nvmeio_trc_size; uint32_t nvmeio_trc_output_idx; @@ -1209,6 +1216,13 @@ struct lpfc_hba { uint64_t ktime_seg10_min; uint64_t ktime_seg10_max; #endif + + struct hlist_node cpuhp; /* used for cpuhp per hba callback */ + struct timer_list cpuhp_poll_timer; + struct list_head poll_list; /* slowpath eq polling list */ +#define LPFC_POLL_HB 1 /* slowpath heartbeat */ +#define LPFC_POLL_FASTPATH 0 /* called from fastpath */ +#define LPFC_POLL_SLOWPATH 1 /* called from slowpath */ }; static inline struct Scsi_Host * @@ -1299,6 +1313,26 @@ lpfc_phba_elsring(struct lpfc_hba *phba) } /** + * lpfc_next_online_numa_cpu - Finds next online CPU on NUMA node + * @numa_mask: Pointer to phba's numa_mask member. + * @start: starting cpu index + * + * Note: If no valid cpu found, then nr_cpu_ids is returned. + * + **/ +static inline unsigned int +lpfc_next_online_numa_cpu(const struct cpumask *numa_mask, unsigned int start) +{ + unsigned int cpu_it; + + for_each_cpu_wrap(cpu_it, numa_mask, start) { + if (cpu_online(cpu_it)) + break; + } + + return cpu_it; +} +/** * lpfc_sli4_mod_hba_eq_delay - update EQ delay * @phba: Pointer to HBA context object. * @q: The Event Queue to update. diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 25aa7a53d255..4ff82b36a37a 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -176,7 +176,6 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr, int i; int len = 0; char tmp[LPFC_MAX_NVME_INFO_TMP_LEN] = {0}; - unsigned long iflags = 0; if (!(vport->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) { len = scnprintf(buf, PAGE_SIZE, "NVME Disabled\n"); @@ -347,7 +346,6 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr, if (strlcat(buf, "\nNVME Initiator Enabled\n", PAGE_SIZE) >= PAGE_SIZE) goto buffer_done; - rcu_read_lock(); scnprintf(tmp, sizeof(tmp), "XRI Dist lpfc%d Total %d IO %d ELS %d\n", phba->brd_no, @@ -355,7 +353,7 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr, phba->sli4_hba.io_xri_max, lpfc_sli4_get_els_iocb_cnt(phba)); if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE) - goto rcu_unlock_buf_done; + goto buffer_done; /* Port state is only one of two values for now. */ if (localport->port_id) @@ -371,15 +369,17 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr, wwn_to_u64(vport->fc_nodename.u.wwn), localport->port_id, statep); if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE) - goto rcu_unlock_buf_done; + goto buffer_done; + + spin_lock_irq(shost->host_lock); list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { nrport = NULL; - spin_lock_irqsave(&vport->phba->hbalock, iflags); + spin_lock(&vport->phba->hbalock); rport = lpfc_ndlp_get_nrport(ndlp); if (rport) nrport = rport->remoteport; - spin_unlock_irqrestore(&vport->phba->hbalock, iflags); + spin_unlock(&vport->phba->hbalock); if (!nrport) continue; @@ -398,39 +398,39 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr, /* Tab in to show lport ownership. */ if (strlcat(buf, "NVME RPORT ", PAGE_SIZE) >= PAGE_SIZE) - goto rcu_unlock_buf_done; + goto unlock_buf_done; if (phba->brd_no >= 10) { if (strlcat(buf, " ", PAGE_SIZE) >= PAGE_SIZE) - goto rcu_unlock_buf_done; + goto unlock_buf_done; } scnprintf(tmp, sizeof(tmp), "WWPN x%llx ", nrport->port_name); if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE) - goto rcu_unlock_buf_done; + goto unlock_buf_done; scnprintf(tmp, sizeof(tmp), "WWNN x%llx ", nrport->node_name); if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE) - goto rcu_unlock_buf_done; + goto unlock_buf_done; scnprintf(tmp, sizeof(tmp), "DID x%06x ", nrport->port_id); if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE) - goto rcu_unlock_buf_done; + goto unlock_buf_done; /* An NVME rport can have multiple roles. */ if (nrport->port_role & FC_PORT_ROLE_NVME_INITIATOR) { if (strlcat(buf, "INITIATOR ", PAGE_SIZE) >= PAGE_SIZE) - goto rcu_unlock_buf_done; + goto unlock_buf_done; } if (nrport->port_role & FC_PORT_ROLE_NVME_TARGET) { if (strlcat(buf, "TARGET ", PAGE_SIZE) >= PAGE_SIZE) - goto rcu_unlock_buf_done; + goto unlock_buf_done; } if (nrport->port_role & FC_PORT_ROLE_NVME_DISCOVERY) { if (strlcat(buf, "DISCSRVC ", PAGE_SIZE) >= PAGE_SIZE) - goto rcu_unlock_buf_done; + goto unlock_buf_done; } if (nrport->port_role & ~(FC_PORT_ROLE_NVME_INITIATOR | FC_PORT_ROLE_NVME_TARGET | @@ -438,14 +438,14 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr, scnprintf(tmp, sizeof(tmp), "UNKNOWN ROLE x%x", nrport->port_role); if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE) - goto rcu_unlock_buf_done; + goto unlock_buf_done; } scnprintf(tmp, sizeof(tmp), "%s\n", statep); if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE) - goto rcu_unlock_buf_done; + goto unlock_buf_done; } - rcu_read_unlock(); + spin_unlock_irq(shost->host_lock); if (!lport) goto buffer_done; @@ -505,11 +505,11 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr, atomic_read(&lport->cmpl_fcp_err)); strlcat(buf, tmp, PAGE_SIZE); - /* RCU is already unlocked. */ + /* host_lock is already unlocked. */ goto buffer_done; - rcu_unlock_buf_done: - rcu_read_unlock(); + unlock_buf_done: + spin_unlock_irq(shost->host_lock); buffer_done: len = strnlen(buf, PAGE_SIZE); @@ -1475,8 +1475,9 @@ lpfc_sli4_pdev_status_reg_wait(struct lpfc_hba *phba) int i; msleep(100); - lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr, - &portstat_reg.word0); + if (lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr, + &portstat_reg.word0)) + return -EIO; /* verify if privileged for the request operation */ if (!bf_get(lpfc_sliport_status_rn, &portstat_reg) && @@ -1486,8 +1487,9 @@ lpfc_sli4_pdev_status_reg_wait(struct lpfc_hba *phba) /* wait for the SLI port firmware ready after firmware reset */ for (i = 0; i < LPFC_FW_RESET_MAXIMUM_WAIT_10MS_CNT; i++) { msleep(10); - lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr, - &portstat_reg.word0); + if (lpfc_readl(phba->sli4_hba.u.if_type2.STATUSregaddr, + &portstat_reg.word0)) + continue; if (!bf_get(lpfc_sliport_status_err, &portstat_reg)) continue; if (!bf_get(lpfc_sliport_status_rn, &portstat_reg)) @@ -1642,7 +1644,7 @@ lpfc_set_trunking(struct lpfc_hba *phba, char *buff_out) { LPFC_MBOXQ_t *mbox = NULL; unsigned long val = 0; - char *pval = 0; + char *pval = NULL; int rc = 0; if (!strncmp("enable", buff_out, @@ -3533,6 +3535,31 @@ LPFC_ATTR_R(enable_rrq, 2, 0, 2, LPFC_ATTR_R(suppress_link_up, LPFC_INITIALIZE_LINK, LPFC_INITIALIZE_LINK, LPFC_DELAY_INIT_LINK_INDEFINITELY, "Suppress Link Up at initialization"); + +static ssize_t +lpfc_pls_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba; + + return scnprintf(buf, PAGE_SIZE, "%d\n", + phba->sli4_hba.pc_sli4_params.pls); +} +static DEVICE_ATTR(pls, 0444, + lpfc_pls_show, NULL); + +static ssize_t +lpfc_pt_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba; + + return scnprintf(buf, PAGE_SIZE, "%d\n", + (phba->hba_flag & HBA_PERSISTENT_TOPO) ? 1 : 0); +} +static DEVICE_ATTR(pt, 0444, + lpfc_pt_show, NULL); + /* # lpfc_cnt: Number of IOCBs allocated for ELS, CT, and ABTS # 1 - (1024) @@ -3580,9 +3607,6 @@ lpfc_txcmplq_hw_show(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(txcmplq_hw, S_IRUGO, lpfc_txcmplq_hw_show, NULL); -LPFC_ATTR_R(iocb_cnt, 2, 1, 5, - "Number of IOCBs alloc for ELS, CT, and ABTS: 1k to 5k IOCBs"); - /* # lpfc_nodev_tmo: If set, it will hold all I/O errors on devices that disappear # until the timer expires. Value range is [0,255]. Default value is 30. @@ -4096,7 +4120,16 @@ lpfc_topology_store(struct device *dev, struct device_attribute *attr, val); return -EINVAL; } - if ((phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC || + /* + * The 'topology' is not a configurable parameter if : + * - persistent topology enabled + * - G7 adapters + * - G6 with no private loop support + */ + + if (((phba->hba_flag & HBA_PERSISTENT_TOPO) || + (!phba->sli4_hba.pc_sli4_params.pls && + phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC) || phba->pcidev->device == PCI_DEVICE_ID_LANCER_G7_FC) && val == 4) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, @@ -5298,7 +5331,7 @@ lpfc_fcp_cpu_map_show(struct device *dev, struct device_attribute *attr, len += scnprintf(buf + len, PAGE_SIZE - len, "CPU %02d not present\n", phba->sli4_hba.curr_disp_cpu); - else if (cpup->irq == LPFC_VECTOR_MAP_EMPTY) { + else if (cpup->eq == LPFC_VECTOR_MAP_EMPTY) { if (cpup->hdwq == LPFC_VECTOR_MAP_EMPTY) len += scnprintf( buf + len, PAGE_SIZE - len, @@ -5311,10 +5344,10 @@ lpfc_fcp_cpu_map_show(struct device *dev, struct device_attribute *attr, else len += scnprintf( buf + len, PAGE_SIZE - len, - "CPU %02d EQ %04d hdwq %04d " + "CPU %02d EQ None hdwq %04d " "physid %d coreid %d ht %d ua %d\n", phba->sli4_hba.curr_disp_cpu, - cpup->eq, cpup->hdwq, cpup->phys_id, + cpup->hdwq, cpup->phys_id, cpup->core_id, (cpup->flag & LPFC_CPU_MAP_HYPER), (cpup->flag & LPFC_CPU_MAP_UNASSIGN)); @@ -5329,7 +5362,7 @@ lpfc_fcp_cpu_map_show(struct device *dev, struct device_attribute *attr, cpup->core_id, (cpup->flag & LPFC_CPU_MAP_HYPER), (cpup->flag & LPFC_CPU_MAP_UNASSIGN), - cpup->irq); + lpfc_get_irq(cpup->eq)); else len += scnprintf( buf + len, PAGE_SIZE - len, @@ -5340,7 +5373,7 @@ lpfc_fcp_cpu_map_show(struct device *dev, struct device_attribute *attr, cpup->core_id, (cpup->flag & LPFC_CPU_MAP_HYPER), (cpup->flag & LPFC_CPU_MAP_UNASSIGN), - cpup->irq); + lpfc_get_irq(cpup->eq)); } phba->sli4_hba.curr_disp_cpu++; @@ -5711,7 +5744,7 @@ LPFC_ATTR_RW(nvme_embed_cmd, 1, 0, 2, * the driver will advertise it supports to the SCSI layer. * * 0 = Set nr_hw_queues by the number of CPUs or HW queues. - * 1,128 = Manually specify the maximum nr_hw_queue value to be set, + * 1,256 = Manually specify nr_hw_queue value to be advertised, * * Value range is [0,256]. Default value is 8. */ @@ -5729,30 +5762,130 @@ LPFC_ATTR_R(fcp_mq_threshold, LPFC_FCP_MQ_THRESHOLD_DEF, * A hardware IO queue maps (qidx) to a specific driver CQ/WQ. * * 0 = Configure the number of hdw queues to the number of active CPUs. - * 1,128 = Manually specify how many hdw queues to use. + * 1,256 = Manually specify how many hdw queues to use. * - * Value range is [0,128]. Default value is 0. + * Value range is [0,256]. Default value is 0. */ LPFC_ATTR_R(hdw_queue, LPFC_HBA_HDWQ_DEF, LPFC_HBA_HDWQ_MIN, LPFC_HBA_HDWQ_MAX, "Set the number of I/O Hardware Queues"); +static inline void +lpfc_assign_default_irq_numa(struct lpfc_hba *phba) +{ +#if IS_ENABLED(CONFIG_X86) + /* If AMD architecture, then default is LPFC_IRQ_CHANN_NUMA */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + phba->cfg_irq_numa = 1; + else + phba->cfg_irq_numa = 0; +#else + phba->cfg_irq_numa = 0; +#endif +} + /* * lpfc_irq_chann: Set the number of IRQ vectors that are available * for Hardware Queues to utilize. This also will map to the number * of EQ / MSI-X vectors the driver will create. This should never be * more than the number of Hardware Queues * - * 0 = Configure number of IRQ Channels to the number of active CPUs. - * 1,128 = Manually specify how many IRQ Channels to use. + * 0 = Configure number of IRQ Channels to: + * if AMD architecture, number of CPUs on HBA's NUMA node + * otherwise, number of active CPUs. + * [1,256] = Manually specify how many IRQ Channels to use. * - * Value range is [0,128]. Default value is 0. + * Value range is [0,256]. Default value is [0]. */ -LPFC_ATTR_R(irq_chann, - LPFC_HBA_HDWQ_DEF, - LPFC_HBA_HDWQ_MIN, LPFC_HBA_HDWQ_MAX, - "Set the number of I/O IRQ Channels"); +static uint lpfc_irq_chann = LPFC_IRQ_CHANN_DEF; +module_param(lpfc_irq_chann, uint, 0444); +MODULE_PARM_DESC(lpfc_irq_chann, "Set number of interrupt vectors to allocate"); + +/* lpfc_irq_chann_init - Set the hba irq_chann initial value + * @phba: lpfc_hba pointer. + * @val: contains the initial value + * + * Description: + * Validates the initial value is within range and assigns it to the + * adapter. If not in range, an error message is posted and the + * default value is assigned. + * + * Returns: + * zero if value is in range and is set + * -EINVAL if value was out of range + **/ +static int +lpfc_irq_chann_init(struct lpfc_hba *phba, uint32_t val) +{ + const struct cpumask *numa_mask; + + if (phba->cfg_use_msi != 2) { + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "8532 use_msi = %u ignoring cfg_irq_numa\n", + phba->cfg_use_msi); + phba->cfg_irq_numa = 0; + phba->cfg_irq_chann = LPFC_IRQ_CHANN_MIN; + return 0; + } + + /* Check if default setting was passed */ + if (val == LPFC_IRQ_CHANN_DEF) + lpfc_assign_default_irq_numa(phba); + + if (phba->cfg_irq_numa) { + numa_mask = &phba->sli4_hba.numa_mask; + + if (cpumask_empty(numa_mask)) { + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "8533 Could not identify NUMA node, " + "ignoring cfg_irq_numa\n"); + phba->cfg_irq_numa = 0; + phba->cfg_irq_chann = LPFC_IRQ_CHANN_MIN; + } else { + phba->cfg_irq_chann = cpumask_weight(numa_mask); + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "8543 lpfc_irq_chann set to %u " + "(numa)\n", phba->cfg_irq_chann); + } + } else { + if (val > LPFC_IRQ_CHANN_MAX) { + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "8545 lpfc_irq_chann attribute cannot " + "be set to %u, allowed range is " + "[%u,%u]\n", + val, + LPFC_IRQ_CHANN_MIN, + LPFC_IRQ_CHANN_MAX); + phba->cfg_irq_chann = LPFC_IRQ_CHANN_MIN; + return -EINVAL; + } + phba->cfg_irq_chann = val; + } + + return 0; +} + +/** + * lpfc_irq_chann_show - Display value of irq_chann + * @dev: class converted to a Scsi_host structure. + * @attr: device attribute, not used. + * @buf: on return contains a string with the list sizes + * + * Returns: size of formatted string. + **/ +static ssize_t +lpfc_irq_chann_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata; + struct lpfc_hba *phba = vport->phba; + + return scnprintf(buf, PAGE_SIZE, "%u\n", phba->cfg_irq_chann); +} + +static DEVICE_ATTR_RO(lpfc_irq_chann); /* # lpfc_enable_hba_reset: Allow or prevent HBA resets to the hardware. @@ -5933,7 +6066,53 @@ LPFC_ATTR_RW(enable_mds_diags, 0, 0, 1, "Enable MDS Diagnostics"); * [1-4] = Multiple of 1/4th Mb of host memory for FW logging * Value range [0..4]. Default value is 0 */ -LPFC_ATTR_RW(ras_fwlog_buffsize, 0, 0, 4, "Host memory for FW logging"); +LPFC_ATTR(ras_fwlog_buffsize, 0, 0, 4, "Host memory for FW logging"); +lpfc_param_show(ras_fwlog_buffsize); + +static ssize_t +lpfc_ras_fwlog_buffsize_set(struct lpfc_hba *phba, uint val) +{ + int ret = 0; + enum ras_state state; + + if (!lpfc_rangecheck(val, 0, 4)) + return -EINVAL; + + if (phba->cfg_ras_fwlog_buffsize == val) + return 0; + + if (phba->cfg_ras_fwlog_func != PCI_FUNC(phba->pcidev->devfn)) + return -EINVAL; + + spin_lock_irq(&phba->hbalock); + state = phba->ras_fwlog.state; + spin_unlock_irq(&phba->hbalock); + + if (state == REG_INPROGRESS) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "6147 RAS Logging " + "registration is in progress\n"); + return -EBUSY; + } + + /* For disable logging: stop the logs and free the DMA. + * For ras_fwlog_buffsize size change we still need to free and + * reallocate the DMA in lpfc_sli4_ras_fwlog_init. + */ + phba->cfg_ras_fwlog_buffsize = val; + if (state == ACTIVE) { + lpfc_ras_stop_fwlog(phba); + lpfc_sli4_ras_dma_free(phba); + } + + lpfc_sli4_ras_init(phba); + if (phba->ras_fwlog.ras_enabled) + ret = lpfc_sli4_ras_fwlog_init(phba, phba->cfg_ras_fwlog_level, + LPFC_RAS_ENABLE_LOGGING); + return ret; +} + +lpfc_param_store(ras_fwlog_buffsize); +static DEVICE_ATTR_RW(lpfc_ras_fwlog_buffsize); /* * lpfc_ras_fwlog_level: Firmware logging verbosity level @@ -6071,8 +6250,9 @@ struct device_attribute *lpfc_hba_attrs[] = { &dev_attr_lpfc_sriov_nr_virtfn, &dev_attr_lpfc_req_fw_upgrade, &dev_attr_lpfc_suppress_link_up, - &dev_attr_lpfc_iocb_cnt, &dev_attr_iocb_hw, + &dev_attr_pls, + &dev_attr_pt, &dev_attr_txq_hw, &dev_attr_txcmplq_hw, &dev_attr_lpfc_fips_level, @@ -7085,11 +7265,22 @@ struct fc_function_template lpfc_vport_transport_functions = { static void lpfc_get_hba_function_mode(struct lpfc_hba *phba) { - /* If it's a SkyHawk FCoE adapter */ - if (phba->pcidev->device == PCI_DEVICE_ID_SKYHAWK) + /* If the adapter supports FCoE mode */ + switch (phba->pcidev->device) { + case PCI_DEVICE_ID_SKYHAWK: + case PCI_DEVICE_ID_SKYHAWK_VF: + case PCI_DEVICE_ID_LANCER_FCOE: + case PCI_DEVICE_ID_LANCER_FCOE_VF: + case PCI_DEVICE_ID_ZEPHYR_DCSP: + case PCI_DEVICE_ID_HORNET: + case PCI_DEVICE_ID_TIGERSHARK: + case PCI_DEVICE_ID_TOMCAT: phba->hba_flag |= HBA_FCOE_MODE; - else + break; + default: + /* for others, clear the flag */ phba->hba_flag &= ~HBA_FCOE_MODE; + } } /** @@ -7099,6 +7290,7 @@ lpfc_get_hba_function_mode(struct lpfc_hba *phba) void lpfc_get_cfgparam(struct lpfc_hba *phba) { + lpfc_hba_log_verbose_init(phba, lpfc_log_verbose); lpfc_fcp_io_sched_init(phba, lpfc_fcp_io_sched); lpfc_ns_query_init(phba, lpfc_ns_query); lpfc_fcp2_no_tgt_reset_init(phba, lpfc_fcp2_no_tgt_reset); @@ -7205,12 +7397,10 @@ lpfc_get_cfgparam(struct lpfc_hba *phba) phba->cfg_soft_wwpn = 0L; lpfc_sg_seg_cnt_init(phba, lpfc_sg_seg_cnt); lpfc_hba_queue_depth_init(phba, lpfc_hba_queue_depth); - lpfc_hba_log_verbose_init(phba, lpfc_log_verbose); lpfc_aer_support_init(phba, lpfc_aer_support); lpfc_sriov_nr_virtfn_init(phba, lpfc_sriov_nr_virtfn); lpfc_request_firmware_upgrade_init(phba, lpfc_req_fw_upgrade); lpfc_suppress_link_up_init(phba, lpfc_suppress_link_up); - lpfc_iocb_cnt_init(phba, lpfc_iocb_cnt); lpfc_delay_discovery_init(phba, lpfc_delay_discovery); lpfc_sli_mode_init(phba, lpfc_sli_mode); phba->cfg_enable_dss = 1; @@ -7256,11 +7446,11 @@ lpfc_nvme_mod_param_dep(struct lpfc_hba *phba) } if (!phba->cfg_nvmet_mrq) - phba->cfg_nvmet_mrq = phba->cfg_irq_chann; + phba->cfg_nvmet_mrq = phba->cfg_hdw_queue; /* Adjust lpfc_nvmet_mrq to avoid running out of WQE slots */ - if (phba->cfg_nvmet_mrq > phba->cfg_irq_chann) { - phba->cfg_nvmet_mrq = phba->cfg_irq_chann; + if (phba->cfg_nvmet_mrq > phba->cfg_hdw_queue) { + phba->cfg_nvmet_mrq = phba->cfg_hdw_queue; lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC, "6018 Adjust lpfc_nvmet_mrq to %d\n", phba->cfg_nvmet_mrq); diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index 39a736b887b1..d4e1b120cc9e 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -5435,10 +5435,12 @@ lpfc_bsg_get_ras_config(struct bsg_job *job) bsg_reply->reply_data.vendor_reply.vendor_rsp; /* Current logging state */ - if (ras_fwlog->ras_active == true) + spin_lock_irq(&phba->hbalock); + if (ras_fwlog->state == ACTIVE) ras_reply->state = LPFC_RASLOG_STATE_RUNNING; else ras_reply->state = LPFC_RASLOG_STATE_STOPPED; + spin_unlock_irq(&phba->hbalock); ras_reply->log_level = phba->ras_fwlog.fw_loglevel; ras_reply->log_buff_sz = phba->cfg_ras_fwlog_buffsize; @@ -5495,10 +5497,13 @@ lpfc_bsg_set_ras_config(struct bsg_job *job) if (action == LPFC_RASACTION_STOP_LOGGING) { /* Check if already disabled */ - if (ras_fwlog->ras_active == false) { + spin_lock_irq(&phba->hbalock); + if (ras_fwlog->state != ACTIVE) { + spin_unlock_irq(&phba->hbalock); rc = -ESRCH; goto ras_job_error; } + spin_unlock_irq(&phba->hbalock); /* Disable logging */ lpfc_ras_stop_fwlog(phba); @@ -5509,8 +5514,10 @@ lpfc_bsg_set_ras_config(struct bsg_job *job) * FW-logging with new log-level. Return status * "Logging already Running" to caller. **/ - if (ras_fwlog->ras_active) + spin_lock_irq(&phba->hbalock); + if (ras_fwlog->state != INACTIVE) action_status = -EINPROGRESS; + spin_unlock_irq(&phba->hbalock); /* Enable logging */ rc = lpfc_sli4_ras_fwlog_init(phba, log_level, @@ -5626,10 +5633,13 @@ lpfc_bsg_get_ras_fwlog(struct bsg_job *job) goto ras_job_error; /* Logging to be stopped before reading */ - if (ras_fwlog->ras_active == true) { + spin_lock_irq(&phba->hbalock); + if (ras_fwlog->state == ACTIVE) { + spin_unlock_irq(&phba->hbalock); rc = -EINPROGRESS; goto ras_job_error; } + spin_unlock_irq(&phba->hbalock); if (job->request_len < sizeof(struct fc_bsg_request) + diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index b2ad8c750486..ee353c84a097 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -215,6 +215,12 @@ irqreturn_t lpfc_sli_fp_intr_handler(int, void *); irqreturn_t lpfc_sli4_intr_handler(int, void *); irqreturn_t lpfc_sli4_hba_intr_handler(int, void *); +void lpfc_sli4_cleanup_poll_list(struct lpfc_hba *phba); +int lpfc_sli4_poll_eq(struct lpfc_queue *q, uint8_t path); +void lpfc_sli4_poll_hbtimer(struct timer_list *t); +void lpfc_sli4_start_polling(struct lpfc_queue *q); +void lpfc_sli4_stop_polling(struct lpfc_queue *q); + void lpfc_read_rev(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_sli4_swap_str(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_config_ring(struct lpfc_hba *, int, LPFC_MBOXQ_t *); @@ -586,6 +592,7 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *ncmd, void lpfc_nvme_cmd_template(void); void lpfc_nvmet_cmd_template(void); void lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn); +void lpfc_nvme_prep_abort_wqe(struct lpfc_iocbq *pwqeq, u16 xritag, u8 opt); extern int lpfc_enable_nvmet_cnt; extern unsigned long long lpfc_enable_nvmet[]; extern int lpfc_no_hba_reset_cnt; diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 25e86706e207..99c9bb249758 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -763,9 +763,11 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, cpu_to_be16(SLI_CT_RESPONSE_FS_ACC)) { lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, "0208 NameServer Rsp Data: x%x x%x " - "sz x%x\n", + "x%x x%x sz x%x\n", vport->fc_flag, CTreq->un.gid.Fc4Type, + vport->num_disc_nodes, + vport->gidft_inp, irsp->un.genreq64.bdl.bdeSize); lpfc_ns_rsp(vport, @@ -961,9 +963,13 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, if (CTrsp->CommandResponse.bits.CmdRsp == cpu_to_be16(SLI_CT_RESPONSE_FS_ACC)) { lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, - "4105 NameServer Rsp Data: x%x x%x\n", + "4105 NameServer Rsp Data: x%x x%x " + "x%x x%x sz x%x\n", vport->fc_flag, - CTreq->un.gid.Fc4Type); + CTreq->un.gid.Fc4Type, + vport->num_disc_nodes, + vport->gidft_inp, + irsp->un.genreq64.bdl.bdeSize); lpfc_ns_rsp(vport, outp, @@ -1025,6 +1031,11 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, } vport->gidft_inp--; } + + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "6450 GID_PT cmpl inp %d disc %d\n", + vport->gidft_inp, vport->num_disc_nodes); + /* Link up / RSCN discovery */ if ((vport->num_disc_nodes == 0) && (vport->gidft_inp == 0)) { @@ -1159,6 +1170,11 @@ out: /* Link up / RSCN discovery */ if (vport->num_disc_nodes) vport->num_disc_nodes--; + + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "6451 GFF_ID cmpl inp %d disc %d\n", + vport->gidft_inp, vport->num_disc_nodes); + if (vport->num_disc_nodes == 0) { /* * The driver has cycled through all Nports in the RSCN payload. @@ -1868,6 +1884,12 @@ lpfc_cmpl_ct_disc_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, if (irsp->ulpStatus == IOSTAT_LOCAL_REJECT) { switch ((irsp->un.ulpWord[4] & IOERR_PARAM_MASK)) { case IOERR_SLI_ABORTED: + case IOERR_SLI_DOWN: + /* Driver aborted this IO. No retry as error + * is likely Offline->Online or some adapter + * error. Recovery will try again. + */ + break; case IOERR_ABORT_IN_PROGRESS: case IOERR_SEQUENCE_TIMEOUT: case IOERR_ILLEGAL_FRAME: diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 8d34be60d379..2e6a68d9ea4f 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -31,6 +31,7 @@ #include <linux/pci.h> #include <linux/spinlock.h> #include <linux/ctype.h> +#include <linux/vmalloc.h> #include <scsi/scsi.h> #include <scsi/scsi_device.h> @@ -2078,6 +2079,96 @@ lpfc_debugfs_lockstat_write(struct file *file, const char __user *buf, } #endif +static int lpfc_debugfs_ras_log_data(struct lpfc_hba *phba, + char *buffer, int size) +{ + int copied = 0; + struct lpfc_dmabuf *dmabuf, *next; + + spin_lock_irq(&phba->hbalock); + if (phba->ras_fwlog.state != ACTIVE) { + spin_unlock_irq(&phba->hbalock); + return -EINVAL; + } + spin_unlock_irq(&phba->hbalock); + + list_for_each_entry_safe(dmabuf, next, + &phba->ras_fwlog.fwlog_buff_list, list) { + memcpy(buffer + copied, dmabuf->virt, LPFC_RAS_MAX_ENTRY_SIZE); + copied += LPFC_RAS_MAX_ENTRY_SIZE; + if (size > copied) + break; + } + return copied; +} + +static int +lpfc_debugfs_ras_log_release(struct inode *inode, struct file *file) +{ + struct lpfc_debug *debug = file->private_data; + + vfree(debug->buffer); + kfree(debug); + + return 0; +} + +/** + * lpfc_debugfs_ras_log_open - Open the RAS log debugfs buffer + * @inode: The inode pointer that contains a vport pointer. + * @file: The file pointer to attach the log output. + * + * Description: + * This routine is the entry point for the debugfs open file operation. It gets + * the vport from the i_private field in @inode, allocates the necessary buffer + * for the log, fills the buffer from the in-memory log for this vport, and then + * returns a pointer to that log in the private_data field in @file. + * + * Returns: + * This function returns zero if successful. On error it will return a negative + * error value. + **/ +static int +lpfc_debugfs_ras_log_open(struct inode *inode, struct file *file) +{ + struct lpfc_hba *phba = inode->i_private; + struct lpfc_debug *debug; + int size; + int rc = -ENOMEM; + + spin_lock_irq(&phba->hbalock); + if (phba->ras_fwlog.state != ACTIVE) { + spin_unlock_irq(&phba->hbalock); + rc = -EINVAL; + goto out; + } + spin_unlock_irq(&phba->hbalock); + debug = kmalloc(sizeof(*debug), GFP_KERNEL); + if (!debug) + goto out; + + size = LPFC_RAS_MIN_BUFF_POST_SIZE * phba->cfg_ras_fwlog_buffsize; + debug->buffer = vmalloc(size); + if (!debug->buffer) + goto free_debug; + + debug->len = lpfc_debugfs_ras_log_data(phba, debug->buffer, size); + if (debug->len < 0) { + rc = -EINVAL; + goto free_buffer; + } + file->private_data = debug; + + return 0; + +free_buffer: + vfree(debug->buffer); +free_debug: + kfree(debug); +out: + return rc; +} + /** * lpfc_debugfs_dumpHBASlim_open - Open the Dump HBA SLIM debugfs buffer * @inode: The inode pointer that contains a vport pointer. @@ -5286,6 +5377,16 @@ static const struct file_operations lpfc_debugfs_op_lockstat = { }; #endif +#undef lpfc_debugfs_ras_log +static const struct file_operations lpfc_debugfs_ras_log = { + .owner = THIS_MODULE, + .open = lpfc_debugfs_ras_log_open, + .llseek = lpfc_debugfs_lseek, + .read = lpfc_debugfs_read, + .release = lpfc_debugfs_ras_log_release, +}; +#endif + #undef lpfc_debugfs_op_dumpHBASlim static const struct file_operations lpfc_debugfs_op_dumpHBASlim = { .owner = THIS_MODULE, @@ -5457,7 +5558,6 @@ static const struct file_operations lpfc_idiag_op_extAcc = { .release = lpfc_idiag_cmd_release, }; -#endif /* lpfc_idiag_mbxacc_dump_bsg_mbox - idiag debugfs dump bsg mailbox command * @phba: Pointer to HBA context object. @@ -5707,6 +5807,19 @@ lpfc_debugfs_initialize(struct lpfc_vport *vport) goto debug_failed; } + /* RAS log */ + snprintf(name, sizeof(name), "ras_log"); + phba->debug_ras_log = + debugfs_create_file(name, 0644, + phba->hba_debugfs_root, + phba, &lpfc_debugfs_ras_log); + if (!phba->debug_ras_log) { + lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, + "6148 Cannot create debugfs" + " ras_log\n"); + goto debug_failed; + } + /* Setup hbqinfo */ snprintf(name, sizeof(name), "hbqinfo"); phba->debug_hbqinfo = @@ -6117,6 +6230,9 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport) debugfs_remove(phba->debug_hbqinfo); /* hbqinfo */ phba->debug_hbqinfo = NULL; + debugfs_remove(phba->debug_ras_log); + phba->debug_ras_log = NULL; + #ifdef LPFC_HDWQ_LOCK_STAT debugfs_remove(phba->debug_lockstat); /* lockstat */ phba->debug_lockstat = NULL; diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index d5303994bfd6..42a2bf38eaea 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -2236,6 +2236,7 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, struct Scsi_Host *shost = lpfc_shost_from_vport(vport); IOCB_t *irsp; struct lpfc_nodelist *ndlp; + char *mode; /* we pass cmdiocb to state machine which needs rspiocb as well */ cmdiocb->context_un.rsp_iocb = rspiocb; @@ -2273,8 +2274,17 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, goto out; } + /* If we don't send GFT_ID to Fabric, a PRLI error + * could be expected. + */ + if ((vport->fc_flag & FC_FABRIC) || + (vport->cfg_enable_fc4_type != LPFC_ENABLE_BOTH)) + mode = KERN_ERR; + else + mode = KERN_INFO; + /* PRLI failed */ - lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, + lpfc_printf_vlog(vport, mode, LOG_ELS, "2754 PRLI failure DID:%06X Status:x%x/x%x, " "data: x%x\n", ndlp->nlp_DID, irsp->ulpStatus, @@ -4291,6 +4301,11 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, irsp = &rspiocb->iocb; + if (!vport) { + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, + "3177 ELS response failed\n"); + goto out; + } if (cmdiocb->context_un.mbox) mbox = cmdiocb->context_un.mbox; @@ -4430,7 +4445,7 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, mempool_free(mbox, phba->mbox_mem_pool); } out: - if (ndlp && NLP_CHK_NODE_ACT(ndlp)) { + if (ndlp && NLP_CHK_NODE_ACT(ndlp) && shost) { spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~(NLP_ACC_REGLOGIN | NLP_RM_DFLT_RPI); spin_unlock_irq(shost->host_lock); @@ -5260,6 +5275,11 @@ lpfc_els_disc_plogi(struct lpfc_vport *vport) } } } + + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "6452 Discover PLOGI %d flag x%x\n", + sentplogi, vport->fc_flag); + if (sentplogi) { lpfc_set_disctmo(vport); } @@ -6455,7 +6475,7 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, uint32_t payload_len, length, nportid, *cmd; int rscn_cnt; int rscn_id = 0, hba_id = 0; - int i; + int i, tmo; pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; lp = (uint32_t *) pcmd->virt; @@ -6561,6 +6581,13 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, spin_lock_irq(shost->host_lock); vport->fc_flag |= FC_RSCN_DEFERRED; + + /* Restart disctmo if its already running */ + if (vport->fc_flag & FC_DISC_TMO) { + tmo = ((phba->fc_ratov * 3) + 3); + mod_timer(&vport->fc_disctmo, + jiffies + msecs_to_jiffies(1000 * tmo)); + } if ((rscn_cnt < FC_MAX_HOLD_RSCN) && !(vport->fc_flag & FC_RSCN_DISCOVERY)) { vport->fc_flag |= FC_RSCN_MODE; @@ -6663,9 +6690,10 @@ lpfc_els_handle_rscn(struct lpfc_vport *vport) /* RSCN processed */ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, - "0215 RSCN processed Data: x%x x%x x%x x%x\n", + "0215 RSCN processed Data: x%x x%x x%x x%x x%x x%x\n", vport->fc_flag, 0, vport->fc_rscn_id_cnt, - vport->port_state); + vport->port_state, vport->num_disc_nodes, + vport->gidft_inp); /* To process RSCN, first compare RSCN data with NameServer */ vport->fc_ns_retry = 0; @@ -7986,20 +8014,22 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport) struct lpfc_sli_ring *pring; struct lpfc_iocbq *tmp_iocb, *piocb; IOCB_t *cmd = NULL; + unsigned long iflags = 0; lpfc_fabric_abort_vport(vport); + /* * For SLI3, only the hbalock is required. But SLI4 needs to coordinate * with the ring insert operation. Because lpfc_sli_issue_abort_iotag * ultimately grabs the ring_lock, the driver must splice the list into * a working list and release the locks before calling the abort. */ - spin_lock_irq(&phba->hbalock); + spin_lock_irqsave(&phba->hbalock, iflags); pring = lpfc_phba_elsring(phba); /* Bail out if we've no ELS wq, like in PCI error recovery case. */ if (unlikely(!pring)) { - spin_unlock_irq(&phba->hbalock); + spin_unlock_irqrestore(&phba->hbalock, iflags); return; } @@ -8014,6 +8044,9 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport) if (piocb->vport != vport) continue; + if (piocb->iocb_flag & LPFC_DRIVER_ABORTED) + continue; + /* On the ELS ring we can have ELS_REQUESTs or * GEN_REQUESTs waiting for a response. */ @@ -8037,21 +8070,21 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport) if (phba->sli_rev == LPFC_SLI_REV4) spin_unlock(&pring->ring_lock); - spin_unlock_irq(&phba->hbalock); + spin_unlock_irqrestore(&phba->hbalock, iflags); /* Abort each txcmpl iocb on aborted list and remove the dlist links. */ list_for_each_entry_safe(piocb, tmp_iocb, &abort_list, dlist) { - spin_lock_irq(&phba->hbalock); + spin_lock_irqsave(&phba->hbalock, iflags); list_del_init(&piocb->dlist); lpfc_sli_issue_abort_iotag(phba, pring, piocb); - spin_unlock_irq(&phba->hbalock); + spin_unlock_irqrestore(&phba->hbalock, iflags); } if (!list_empty(&abort_list)) lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, "3387 abort list for txq not empty\n"); INIT_LIST_HEAD(&abort_list); - spin_lock_irq(&phba->hbalock); + spin_lock_irqsave(&phba->hbalock, iflags); if (phba->sli_rev == LPFC_SLI_REV4) spin_lock(&pring->ring_lock); @@ -8091,7 +8124,7 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport) if (phba->sli_rev == LPFC_SLI_REV4) spin_unlock(&pring->ring_lock); - spin_unlock_irq(&phba->hbalock); + spin_unlock_irqrestore(&phba->hbalock, iflags); /* Cancel all the IOCBs from the completions list */ lpfc_sli_cancel_iocbs(phba, &abort_list, diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 749286acdc17..85ada3deb47d 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -700,7 +700,10 @@ lpfc_work_done(struct lpfc_hba *phba) if (!(phba->hba_flag & HBA_SP_QUEUE_EVT)) set_bit(LPFC_DATA_READY, &phba->data_flags); } else { - if (phba->link_state >= LPFC_LINK_UP || + /* Driver could have abort request completed in queue + * when link goes down. Allow for this transition. + */ + if (phba->link_state >= LPFC_LINK_DOWN || phba->link_flag & LS_MDS_LOOPBACK) { pring->flag &= ~LPFC_DEFERRED_RING_EVENT; lpfc_sli_handle_slow_ring_event(phba, pring, @@ -1135,7 +1138,6 @@ void lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { struct lpfc_vport *vport = pmb->vport; - uint8_t bbscn = 0; if (pmb->u.mb.mbxStatus) goto out; @@ -1162,17 +1164,11 @@ lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) /* Start discovery by sending a FLOGI. port_state is identically * LPFC_FLOGI while waiting for FLOGI cmpl */ - if (vport->port_state != LPFC_FLOGI) { - if (phba->bbcredit_support && phba->cfg_enable_bbcr) { - bbscn = bf_get(lpfc_bbscn_def, - &phba->sli4_hba.bbscn_params); - vport->fc_sparam.cmn.bbRcvSizeMsb &= 0xf; - vport->fc_sparam.cmn.bbRcvSizeMsb |= (bbscn << 4); - } + if (vport->port_state != LPFC_FLOGI) lpfc_initial_flogi(vport); - } else if (vport->fc_flag & FC_PT2PT) { + else if (vport->fc_flag & FC_PT2PT) lpfc_disc_start(vport); - } + return; out: @@ -3456,8 +3452,8 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) phba->pport->port_state, vport->fc_flag); else if (attn_type == LPFC_ATT_UNEXP_WWPN) lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT, - "1313 Link Down UNEXP WWPN Event x%x received " - "Data: x%x x%x x%x x%x x%x\n", + "1313 Link Down Unexpected FA WWPN Event x%x " + "received Data: x%x x%x x%x x%x x%x\n", la->eventTag, phba->fc_eventTag, phba->pport->port_state, vport->fc_flag, bf_get(lpfc_mbx_read_top_mm, la), @@ -4046,7 +4042,7 @@ out: ndlp->nlp_flag |= NLP_RPI_REGISTERED; ndlp->nlp_type |= NLP_FABRIC; lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); - lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, + lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE | LOG_DISCOVERY, "0003 rpi:%x DID:%x flg:%x %d map%x x%px\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, kref_read(&ndlp->kref), @@ -4575,8 +4571,10 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, return ndlp; free_rpi: - if (phba->sli_rev == LPFC_SLI_REV4) + if (phba->sli_rev == LPFC_SLI_REV4) { lpfc_sli4_free_rpi(vport->phba, rpi); + ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR; + } return NULL; } @@ -4835,12 +4833,51 @@ lpfc_nlp_logo_unreg(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) if (ndlp->nlp_flag & NLP_RELEASE_RPI) { lpfc_sli4_free_rpi(vport->phba, ndlp->nlp_rpi); ndlp->nlp_flag &= ~NLP_RELEASE_RPI; + ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR; } ndlp->nlp_flag &= ~NLP_UNREG_INP; } } /* + * Sets the mailbox completion handler to be used for the + * unreg_rpi command. The handler varies based on the state of + * the port and what will be happening to the rpi next. + */ +static void +lpfc_set_unreg_login_mbx_cmpl(struct lpfc_hba *phba, struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, LPFC_MBOXQ_t *mbox) +{ + unsigned long iflags; + + if (ndlp->nlp_flag & NLP_ISSUE_LOGO) { + mbox->ctx_ndlp = ndlp; + mbox->mbox_cmpl = lpfc_nlp_logo_unreg; + + } else if (phba->sli_rev == LPFC_SLI_REV4 && + (!(vport->load_flag & FC_UNLOADING)) && + (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >= + LPFC_SLI_INTF_IF_TYPE_2) && + (kref_read(&ndlp->kref) > 0)) { + mbox->ctx_ndlp = lpfc_nlp_get(ndlp); + mbox->mbox_cmpl = lpfc_sli4_unreg_rpi_cmpl_clr; + } else { + if (vport->load_flag & FC_UNLOADING) { + if (phba->sli_rev == LPFC_SLI_REV4) { + spin_lock_irqsave(&vport->phba->ndlp_lock, + iflags); + ndlp->nlp_flag |= NLP_RELEASE_RPI; + spin_unlock_irqrestore(&vport->phba->ndlp_lock, + iflags); + } + lpfc_nlp_get(ndlp); + } + mbox->ctx_ndlp = ndlp; + mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + } +} + +/* * Free rpi associated with LPFC_NODELIST entry. * This routine is called from lpfc_freenode(), when we are removing * a LPFC_NODELIST entry. It is also called if the driver initiates a @@ -4860,7 +4897,8 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) if (ndlp->nlp_flag & NLP_RPI_REGISTERED || ndlp->nlp_flag & NLP_REG_LOGIN_SEND) { if (ndlp->nlp_flag & NLP_REG_LOGIN_SEND) - lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, + lpfc_printf_vlog(vport, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, "3366 RPI x%x needs to be " "unregistered nlp_flag x%x " "did x%x\n", @@ -4871,7 +4909,8 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) * no need to queue up another one. */ if (ndlp->nlp_flag & NLP_UNREG_INP) { - lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + lpfc_printf_vlog(vport, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, "1436 unreg_rpi SKIP UNREG x%x on " "NPort x%x deferred x%x flg x%x " "Data: x%px\n", @@ -4890,39 +4929,19 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) lpfc_unreg_login(phba, vport->vpi, rpi, mbox); mbox->vport = vport; - if (ndlp->nlp_flag & NLP_ISSUE_LOGO) { - mbox->ctx_ndlp = ndlp; - mbox->mbox_cmpl = lpfc_nlp_logo_unreg; - } else { - if (phba->sli_rev == LPFC_SLI_REV4 && - (!(vport->load_flag & FC_UNLOADING)) && - (bf_get(lpfc_sli_intf_if_type, - &phba->sli4_hba.sli_intf) >= - LPFC_SLI_INTF_IF_TYPE_2) && - (kref_read(&ndlp->kref) > 0)) { - mbox->ctx_ndlp = lpfc_nlp_get(ndlp); - mbox->mbox_cmpl = - lpfc_sli4_unreg_rpi_cmpl_clr; - /* - * accept PLOGIs after unreg_rpi_cmpl - */ - acc_plogi = 0; - } else if (vport->load_flag & FC_UNLOADING) { - mbox->ctx_ndlp = NULL; - mbox->mbox_cmpl = - lpfc_sli_def_mbox_cmpl; - } else { - mbox->ctx_ndlp = ndlp; - mbox->mbox_cmpl = - lpfc_sli_def_mbox_cmpl; - } - } + lpfc_set_unreg_login_mbx_cmpl(phba, vport, ndlp, mbox); + if (mbox->mbox_cmpl == lpfc_sli4_unreg_rpi_cmpl_clr) + /* + * accept PLOGIs after unreg_rpi_cmpl + */ + acc_plogi = 0; if (((ndlp->nlp_DID & Fabric_DID_MASK) != Fabric_DID_MASK) && (!(vport->fc_flag & FC_OFFLINE_MODE))) ndlp->nlp_flag |= NLP_UNREG_INP; - lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + lpfc_printf_vlog(vport, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, "1433 unreg_rpi UNREG x%x on " "NPort x%x deferred flg x%x " "Data:x%px\n", @@ -5057,6 +5076,7 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) struct lpfc_hba *phba = vport->phba; LPFC_MBOXQ_t *mb, *nextmb; struct lpfc_dmabuf *mp; + unsigned long iflags; /* Cleanup node for NPort <nlp_DID> */ lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, @@ -5138,8 +5158,20 @@ lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) lpfc_cleanup_vports_rrqs(vport, ndlp); if (phba->sli_rev == LPFC_SLI_REV4) ndlp->nlp_flag |= NLP_RELEASE_RPI; - lpfc_unreg_rpi(vport, ndlp); - + if (!lpfc_unreg_rpi(vport, ndlp)) { + /* Clean up unregistered and non freed rpis */ + if ((ndlp->nlp_flag & NLP_RELEASE_RPI) && + !(ndlp->nlp_rpi == LPFC_RPI_ALLOC_ERROR)) { + lpfc_sli4_free_rpi(vport->phba, + ndlp->nlp_rpi); + spin_lock_irqsave(&vport->phba->ndlp_lock, + iflags); + ndlp->nlp_flag &= ~NLP_RELEASE_RPI; + ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR; + spin_unlock_irqrestore(&vport->phba->ndlp_lock, + iflags); + } + } return 0; } @@ -5165,8 +5197,10 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) /* For this case we need to cleanup the default rpi * allocated by the firmware. */ - lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, - "0005 rpi:%x DID:%x flg:%x %d map:%x x%px\n", + lpfc_printf_vlog(vport, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, + "0005 Cleanup Default rpi:x%x DID:x%x flg:x%x " + "ref %d map:x%x ndlp x%px\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, kref_read(&ndlp->kref), ndlp->nlp_usg_map, ndlp); @@ -5203,8 +5237,9 @@ lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) */ lpfc_printf_vlog(vport, KERN_WARNING, LOG_NODE, "0940 removed node x%px DID x%x " - " rport not null x%px\n", - ndlp, ndlp->nlp_DID, ndlp->rport); + "rpi %d rport not null x%px\n", + ndlp, ndlp->nlp_DID, ndlp->nlp_rpi, + ndlp->rport); rport = ndlp->rport; rdata = rport->dd_data; rdata->pnode = NULL; @@ -5362,6 +5397,13 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did) if (!ndlp) return NULL; lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "6453 Setup New Node 2B_DISC x%x " + "Data:x%x x%x x%x\n", + ndlp->nlp_DID, ndlp->nlp_flag, + ndlp->nlp_state, vport->fc_flag); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); @@ -5375,6 +5417,12 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did) "0014 Could not enable ndlp\n"); return NULL; } + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "6454 Setup Enabled Node 2B_DISC x%x " + "Data:x%x x%x x%x\n", + ndlp->nlp_DID, ndlp->nlp_flag, + ndlp->nlp_state, vport->fc_flag); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); @@ -5394,6 +5442,12 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did) */ lpfc_cancel_retry_delay_tmo(vport, ndlp); + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "6455 Setup RSCN Node 2B_DISC x%x " + "Data:x%x x%x x%x\n", + ndlp->nlp_DID, ndlp->nlp_flag, + ndlp->nlp_state, vport->fc_flag); + /* NVME Target mode waits until rport is known to be * impacted by the RSCN before it transitions. No * active management - just go to NPR provided the @@ -5405,15 +5459,32 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did) /* If we've already received a PLOGI from this NPort * we don't need to try to discover it again. */ - if (ndlp->nlp_flag & NLP_RCV_PLOGI) + if (ndlp->nlp_flag & NLP_RCV_PLOGI && + !(ndlp->nlp_type & + (NLP_FCP_TARGET | NLP_NVME_TARGET))) return NULL; + ndlp->nlp_prev_state = ndlp->nlp_state; + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); - } else + } else { + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "6456 Skip Setup RSCN Node x%x " + "Data:x%x x%x x%x\n", + ndlp->nlp_DID, ndlp->nlp_flag, + ndlp->nlp_state, vport->fc_flag); ndlp = NULL; + } } else { + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, + "6457 Setup Active Node 2B_DISC x%x " + "Data:x%x x%x x%x\n", + ndlp->nlp_DID, ndlp->nlp_flag, + ndlp->nlp_state, vport->fc_flag); + /* If the initiator received a PLOGI from this NPort or if the * initiator is already in the process of discovery on it, * there's no need to try to discover it again. @@ -5565,10 +5636,10 @@ lpfc_disc_start(struct lpfc_vport *vport) /* Start Discovery state <hba_state> */ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, - "0202 Start Discovery hba state x%x " - "Data: x%x x%x x%x\n", + "0202 Start Discovery port state x%x " + "flg x%x Data: x%x x%x x%x\n", vport->port_state, vport->fc_flag, vport->fc_plogi_cnt, - vport->fc_adisc_cnt); + vport->fc_adisc_cnt, vport->fc_npr_cnt); /* First do ADISCs - if any */ num_sent = lpfc_els_disc_adisc(vport); @@ -5996,7 +6067,7 @@ lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) ndlp->nlp_flag |= NLP_RPI_REGISTERED; ndlp->nlp_type |= NLP_FABRIC; lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); - lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI, + lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE | LOG_DISCOVERY, "0004 rpi:%x DID:%x flg:%x %d map:%x x%px\n", ndlp->nlp_rpi, ndlp->nlp_DID, ndlp->nlp_flag, kref_read(&ndlp->kref), @@ -6185,12 +6256,12 @@ lpfc_nlp_init(struct lpfc_vport *vport, uint32_t did) INIT_LIST_HEAD(&ndlp->nlp_listp); if (vport->phba->sli_rev == LPFC_SLI_REV4) { ndlp->nlp_rpi = rpi; - lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, - "0007 rpi:%x DID:%x flg:%x refcnt:%d " - "map:%x x%px\n", ndlp->nlp_rpi, ndlp->nlp_DID, - ndlp->nlp_flag, - kref_read(&ndlp->kref), - ndlp->nlp_usg_map, ndlp); + lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE | LOG_DISCOVERY, + "0007 Init New ndlp x%px, rpi:x%x DID:%x " + "flg:x%x refcnt:%d map:x%x\n", + ndlp, ndlp->nlp_rpi, ndlp->nlp_DID, + ndlp->nlp_flag, kref_read(&ndlp->kref), + ndlp->nlp_usg_map); ndlp->active_rrqs_xri_bitmap = mempool_alloc(vport->phba->active_rrq_pool, @@ -6419,7 +6490,8 @@ lpfc_fcf_inuse(struct lpfc_hba *phba) goto out; } else if (ndlp->nlp_flag & NLP_RPI_REGISTERED) { ret = 1; - lpfc_printf_log(phba, KERN_INFO, LOG_ELS, + lpfc_printf_log(phba, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, "2624 RPI %x DID %x flag %x " "still logged in\n", ndlp->nlp_rpi, ndlp->nlp_DID, diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index bd533475c86a..25cdcbc2b02f 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -210,7 +210,6 @@ struct lpfc_sli_intf { #define LPFC_MAX_IMAX 5000000 #define LPFC_DEF_IMAX 0 -#define LPFC_IMAX_THRESHOLD 1000 #define LPFC_MAX_AUTO_EQ_DELAY 120 #define LPFC_EQ_DELAY_STEP 15 #define LPFC_EQD_ISR_TRIGGER 20000 @@ -2320,6 +2319,7 @@ struct lpfc_mbx_redisc_fcf_tbl { #define ADD_STATUS_OPERATION_ALREADY_ACTIVE 0x67 #define ADD_STATUS_FW_NOT_SUPPORTED 0xEB #define ADD_STATUS_INVALID_REQUEST 0x4B +#define ADD_STATUS_FW_DOWNLOAD_HW_DISABLED 0x58 struct lpfc_mbx_sli4_config { struct mbox_header header; @@ -2809,6 +2809,15 @@ struct lpfc_mbx_read_config { #define lpfc_mbx_rd_conf_trunk_SHIFT 12 #define lpfc_mbx_rd_conf_trunk_MASK 0x0000000F #define lpfc_mbx_rd_conf_trunk_WORD word2 +#define lpfc_mbx_rd_conf_pt_SHIFT 20 +#define lpfc_mbx_rd_conf_pt_MASK 0x00000003 +#define lpfc_mbx_rd_conf_pt_WORD word2 +#define lpfc_mbx_rd_conf_tf_SHIFT 22 +#define lpfc_mbx_rd_conf_tf_MASK 0x00000001 +#define lpfc_mbx_rd_conf_tf_WORD word2 +#define lpfc_mbx_rd_conf_ptv_SHIFT 23 +#define lpfc_mbx_rd_conf_ptv_MASK 0x00000001 +#define lpfc_mbx_rd_conf_ptv_WORD word2 #define lpfc_mbx_rd_conf_topology_SHIFT 24 #define lpfc_mbx_rd_conf_topology_MASK 0x000000FF #define lpfc_mbx_rd_conf_topology_WORD word2 @@ -3479,6 +3488,9 @@ struct lpfc_sli4_parameters { #define cfg_bv1s_SHIFT 10 #define cfg_bv1s_MASK 0x00000001 #define cfg_bv1s_WORD word19 +#define cfg_pvl_SHIFT 13 +#define cfg_pvl_MASK 0x00000001 +#define cfg_pvl_WORD word19 #define cfg_nsler_SHIFT 12 #define cfg_nsler_MASK 0x00000001 @@ -3518,6 +3530,7 @@ struct lpfc_sli4_parameters { #define LPFC_SET_UE_RECOVERY 0x10 #define LPFC_SET_MDS_DIAGS 0x11 +#define LPFC_SET_DUAL_DUMP 0x1e struct lpfc_mbx_set_feature { struct mbox_header header; uint32_t feature; @@ -3532,6 +3545,15 @@ struct lpfc_mbx_set_feature { #define lpfc_mbx_set_feature_mds_deep_loopbk_SHIFT 1 #define lpfc_mbx_set_feature_mds_deep_loopbk_MASK 0x00000001 #define lpfc_mbx_set_feature_mds_deep_loopbk_WORD word6 +#define lpfc_mbx_set_feature_dd_SHIFT 0 +#define lpfc_mbx_set_feature_dd_MASK 0x00000001 +#define lpfc_mbx_set_feature_dd_WORD word6 +#define lpfc_mbx_set_feature_ddquery_SHIFT 1 +#define lpfc_mbx_set_feature_ddquery_MASK 0x00000001 +#define lpfc_mbx_set_feature_ddquery_WORD word6 +#define LPFC_DISABLE_DUAL_DUMP 0 +#define LPFC_ENABLE_DUAL_DUMP 1 +#define LPFC_QUERY_OP_DUAL_DUMP 2 uint32_t word7; #define lpfc_mbx_set_feature_UERP_SHIFT 0 #define lpfc_mbx_set_feature_UERP_MASK 0x0000ffff @@ -4261,6 +4283,8 @@ struct lpfc_acqe_sli { #define LPFC_SLI_EVENT_TYPE_DIAG_DUMP 0x5 #define LPFC_SLI_EVENT_TYPE_MISCONFIGURED 0x9 #define LPFC_SLI_EVENT_TYPE_REMOTE_DPORT 0xA +#define LPFC_SLI_EVENT_TYPE_MISCONF_FAWWN 0xF +#define LPFC_SLI_EVENT_TYPE_EEPROM_FAILURE 0x10 }; /* @@ -4659,6 +4683,7 @@ struct create_xri_wqe { uint32_t rsvd_12_15[4]; /* word 12-15 */ }; +#define INHIBIT_ABORT 1 #define T_REQUEST_TAG 3 #define T_XRI_TAG 1 @@ -4807,8 +4832,8 @@ union lpfc_wqe128 { struct send_frame_wqe send_frame; }; -#define MAGIC_NUMER_G6 0xFEAA0003 -#define MAGIC_NUMER_G7 0xFEAA0005 +#define MAGIC_NUMBER_G6 0xFEAA0003 +#define MAGIC_NUMBER_G7 0xFEAA0005 struct lpfc_grp_hdr { uint32_t size; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index e8813d26e594..dc6f7c4b54c6 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -40,6 +40,8 @@ #include <linux/irq.h> #include <linux/bitops.h> #include <linux/crash_dump.h> +#include <linux/cpu.h> +#include <linux/cpuhotplug.h> #include <scsi/scsi.h> #include <scsi/scsi_device.h> @@ -66,9 +68,13 @@ #include "lpfc_version.h" #include "lpfc_ids.h" +static enum cpuhp_state lpfc_cpuhp_state; /* Used when mapping IRQ vectors in a driver centric manner */ static uint32_t lpfc_present_cpu; +static void __lpfc_cpuhp_remove(struct lpfc_hba *phba); +static void lpfc_cpuhp_remove(struct lpfc_hba *phba); +static void lpfc_cpuhp_add(struct lpfc_hba *phba); static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *); static int lpfc_post_rcv_buf(struct lpfc_hba *); static int lpfc_sli4_queue_verify(struct lpfc_hba *); @@ -1235,10 +1241,9 @@ lpfc_hb_eq_delay_work(struct work_struct *work) struct lpfc_hba, eq_delay_work); struct lpfc_eq_intr_info *eqi, *eqi_new; struct lpfc_queue *eq, *eq_next; - unsigned char *eqcnt = NULL; + unsigned char *ena_delay = NULL; uint32_t usdelay; int i; - bool update = false; if (!phba->cfg_auto_imax || phba->pport->load_flag & FC_UNLOADING) return; @@ -1247,44 +1252,36 @@ lpfc_hb_eq_delay_work(struct work_struct *work) phba->pport->fc_flag & FC_OFFLINE_MODE) goto requeue; - eqcnt = kcalloc(num_possible_cpus(), sizeof(unsigned char), - GFP_KERNEL); - if (!eqcnt) + ena_delay = kcalloc(phba->sli4_hba.num_possible_cpu, sizeof(*ena_delay), + GFP_KERNEL); + if (!ena_delay) goto requeue; - if (phba->cfg_irq_chann > 1) { - /* Loop thru all IRQ vectors */ - for (i = 0; i < phba->cfg_irq_chann; i++) { - /* Get the EQ corresponding to the IRQ vector */ - eq = phba->sli4_hba.hba_eq_hdl[i].eq; - if (!eq) - continue; - if (eq->q_mode) { - update = true; - break; - } - if (eqcnt[eq->last_cpu] < 2) - eqcnt[eq->last_cpu]++; + for (i = 0; i < phba->cfg_irq_chann; i++) { + /* Get the EQ corresponding to the IRQ vector */ + eq = phba->sli4_hba.hba_eq_hdl[i].eq; + if (!eq) + continue; + if (eq->q_mode || eq->q_flag & HBA_EQ_DELAY_CHK) { + eq->q_flag &= ~HBA_EQ_DELAY_CHK; + ena_delay[eq->last_cpu] = 1; } - } else - update = true; + } for_each_present_cpu(i) { eqi = per_cpu_ptr(phba->sli4_hba.eq_info, i); - if (!update && eqcnt[i] < 2) { - eqi->icnt = 0; - continue; + if (ena_delay[i]) { + usdelay = (eqi->icnt >> 10) * LPFC_EQ_DELAY_STEP; + if (usdelay > LPFC_MAX_AUTO_EQ_DELAY) + usdelay = LPFC_MAX_AUTO_EQ_DELAY; + } else { + usdelay = 0; } - usdelay = (eqi->icnt / LPFC_IMAX_THRESHOLD) * - LPFC_EQ_DELAY_STEP; - if (usdelay > LPFC_MAX_AUTO_EQ_DELAY) - usdelay = LPFC_MAX_AUTO_EQ_DELAY; - eqi->icnt = 0; list_for_each_entry_safe(eq, eq_next, &eqi->list, cpu_list) { - if (eq->last_cpu != i) { + if (unlikely(eq->last_cpu != i)) { eqi_new = per_cpu_ptr(phba->sli4_hba.eq_info, eq->last_cpu); list_move_tail(&eq->cpu_list, &eqi_new->list); @@ -1296,7 +1293,7 @@ lpfc_hb_eq_delay_work(struct work_struct *work) } } - kfree(eqcnt); + kfree(ena_delay); requeue: queue_delayed_work(phba->wq, &phba->eq_delay_work, @@ -3053,11 +3050,12 @@ lpfc_sli4_node_prep(struct lpfc_hba *phba) continue; } ndlp->nlp_rpi = rpi; - lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, - "0009 rpi:%x DID:%x " - "flg:%x map:%x x%px\n", ndlp->nlp_rpi, - ndlp->nlp_DID, ndlp->nlp_flag, - ndlp->nlp_usg_map, ndlp); + lpfc_printf_vlog(ndlp->vport, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, + "0009 Assign RPI x%x to ndlp x%px " + "DID:x%06x flg:x%x map:x%x\n", + ndlp->nlp_rpi, ndlp, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp->nlp_usg_map); } } lpfc_destroy_vport_work_array(phba, vports); @@ -3387,6 +3385,8 @@ lpfc_online(struct lpfc_hba *phba) if (phba->cfg_xri_rebalancing) lpfc_create_multixri_pools(phba); + lpfc_cpuhp_add(phba); + lpfc_unblock_mgmt_io(phba); return 0; } @@ -3453,10 +3453,15 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) list_for_each_entry_safe(ndlp, next_ndlp, &vports[i]->fc_nodes, nlp_listp) { - if (!NLP_CHK_NODE_ACT(ndlp)) - continue; - if (ndlp->nlp_state == NLP_STE_UNUSED_NODE) + if ((!NLP_CHK_NODE_ACT(ndlp)) || + ndlp->nlp_state == NLP_STE_UNUSED_NODE) { + /* Driver must assume RPI is invalid for + * any unused or inactive node. + */ + ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR; continue; + } + if (ndlp->nlp_type & NLP_FABRIC) { lpfc_disc_state_machine(vports[i], ndlp, NULL, NLP_EVT_DEVICE_RECOVERY); @@ -3472,16 +3477,16 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) * comes back online. */ if (phba->sli_rev == LPFC_SLI_REV4) { - lpfc_printf_vlog(ndlp->vport, - KERN_INFO, LOG_NODE, - "0011 lpfc_offline: " - "ndlp:x%px did %x " - "usgmap:x%x rpi:%x\n", - ndlp, ndlp->nlp_DID, - ndlp->nlp_usg_map, - ndlp->nlp_rpi); - + lpfc_printf_vlog(ndlp->vport, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, + "0011 Free RPI x%x on " + "ndlp:x%px did x%x " + "usgmap:x%x\n", + ndlp->nlp_rpi, ndlp, + ndlp->nlp_DID, + ndlp->nlp_usg_map); lpfc_sli4_free_rpi(phba, ndlp->nlp_rpi); + ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR; } lpfc_unreg_rpi(vports[i], ndlp); } @@ -3545,6 +3550,7 @@ lpfc_offline(struct lpfc_hba *phba) spin_unlock_irq(shost->host_lock); } lpfc_destroy_vport_work_array(phba, vports); + __lpfc_cpuhp_remove(phba); if (phba->cfg_xri_rebalancing) lpfc_destroy_multixri_pools(phba); @@ -5283,10 +5289,10 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli) evt_type = bf_get(lpfc_trailer_type, acqe_sli); lpfc_printf_log(phba, KERN_INFO, LOG_SLI, - "2901 Async SLI event - Event Data1:x%08x Event Data2:" - "x%08x SLI Event Type:%d\n", + "2901 Async SLI event - Type:%d, Event Data: x%08x " + "x%08x x%08x x%08x\n", evt_type, acqe_sli->event_data1, acqe_sli->event_data2, - evt_type); + acqe_sli->reserved, acqe_sli->trailer); port_name = phba->Port[0]; if (port_name == 0x00) @@ -5433,11 +5439,26 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli) "Event Data1:x%08x Event Data2: x%08x\n", acqe_sli->event_data1, acqe_sli->event_data2); break; + case LPFC_SLI_EVENT_TYPE_MISCONF_FAWWN: + /* Misconfigured WWN. Reports that the SLI Port is configured + * to use FA-WWN, but the attached device doesn’t support it. + * No driver action is required. + * Event Data1 - N.A, Event Data2 - N.A + */ + lpfc_log_msg(phba, KERN_WARNING, LOG_SLI, + "2699 Misconfigured FA-WWN - Attached device does " + "not support FA-WWN\n"); + break; + case LPFC_SLI_EVENT_TYPE_EEPROM_FAILURE: + /* EEPROM failure. No driver action is required */ + lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, + "2518 EEPROM failure - " + "Event Data1: x%08x Event Data2: x%08x\n", + acqe_sli->event_data1, acqe_sli->event_data2); + break; default: lpfc_printf_log(phba, KERN_INFO, LOG_SLI, - "3193 Async SLI event - Event Data1:x%08x Event Data2:" - "x%08x SLI Event Type:%d\n", - acqe_sli->event_data1, acqe_sli->event_data2, + "3193 Unrecognized SLI event, type: 0x%x", evt_type); break; } @@ -5976,6 +5997,29 @@ static void lpfc_log_intr_mode(struct lpfc_hba *phba, uint32_t intr_mode) } /** + * lpfc_cpumask_of_node_init - initalizes cpumask of phba's NUMA node + * @phba: Pointer to HBA context object. + * + **/ +static void +lpfc_cpumask_of_node_init(struct lpfc_hba *phba) +{ + unsigned int cpu, numa_node; + struct cpumask *numa_mask = &phba->sli4_hba.numa_mask; + + cpumask_clear(numa_mask); + + /* Check if we're a NUMA architecture */ + numa_node = dev_to_node(&phba->pcidev->dev); + if (numa_node == NUMA_NO_NODE) + return; + + for_each_possible_cpu(cpu) + if (cpu_to_node(cpu) == numa_node) + cpumask_set_cpu(cpu, numa_mask); +} + +/** * lpfc_enable_pci_dev - Enable a generic PCI device. * @phba: pointer to lpfc hba data structure. * @@ -6418,6 +6462,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) phba->sli4_hba.num_present_cpu = lpfc_present_cpu; phba->sli4_hba.num_possible_cpu = num_possible_cpus(); phba->sli4_hba.curr_disp_cpu = 0; + lpfc_cpumask_of_node_init(phba); /* Get all the module params for configuring this host */ lpfc_get_cfgparam(phba); @@ -6953,6 +6998,7 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba) phba->sli4_hba.num_possible_cpu = 0; phba->sli4_hba.num_present_cpu = 0; phba->sli4_hba.curr_disp_cpu = 0; + cpumask_clear(&phba->sli4_hba.numa_mask); /* Free memory allocated for fast-path work queue handles */ kfree(phba->sli4_hba.hba_eq_hdl); @@ -7126,7 +7172,7 @@ lpfc_init_iocb_list(struct lpfc_hba *phba, int iocb_count) if (iocbq_entry == NULL) { printk(KERN_ERR "%s: only allocated %d iocbs of " "expected %d count. Unloading driver.\n", - __func__, i, LPFC_IOCB_LIST_CNT); + __func__, i, iocb_count); goto out_free_iocbq; } @@ -7545,18 +7591,10 @@ lpfc_create_shost(struct lpfc_hba *phba) if (phba->nvmet_support) { /* Only 1 vport (pport) will support NVME target */ - if (phba->txrdy_payload_pool == NULL) { - phba->txrdy_payload_pool = dma_pool_create( - "txrdy_pool", &phba->pcidev->dev, - TXRDY_PAYLOAD_LEN, 16, 0); - if (phba->txrdy_payload_pool) { - phba->targetport = NULL; - phba->cfg_enable_fc4_type = LPFC_ENABLE_NVME; - lpfc_printf_log(phba, KERN_INFO, - LOG_INIT | LOG_NVME_DISC, - "6076 NVME Target Found\n"); - } - } + phba->targetport = NULL; + phba->cfg_enable_fc4_type = LPFC_ENABLE_NVME; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_NVME_DISC, + "6076 NVME Target Found\n"); } lpfc_debugfs_initialize(vport); @@ -8235,6 +8273,94 @@ lpfc_destroy_bootstrap_mbox(struct lpfc_hba *phba) memset(&phba->sli4_hba.bmbx, 0, sizeof(struct lpfc_bmbx)); } +static const char * const lpfc_topo_to_str[] = { + "Loop then P2P", + "Loopback", + "P2P Only", + "Unsupported", + "Loop Only", + "Unsupported", + "P2P then Loop", +}; + +/** + * lpfc_map_topology - Map the topology read from READ_CONFIG + * @phba: pointer to lpfc hba data structure. + * @rdconf: pointer to read config data + * + * This routine is invoked to map the topology values as read + * from the read config mailbox command. If the persistent + * topology feature is supported, the firmware will provide the + * saved topology information to be used in INIT_LINK + * + **/ +#define LINK_FLAGS_DEF 0x0 +#define LINK_FLAGS_P2P 0x1 +#define LINK_FLAGS_LOOP 0x2 +static void +lpfc_map_topology(struct lpfc_hba *phba, struct lpfc_mbx_read_config *rd_config) +{ + u8 ptv, tf, pt; + + ptv = bf_get(lpfc_mbx_rd_conf_ptv, rd_config); + tf = bf_get(lpfc_mbx_rd_conf_tf, rd_config); + pt = bf_get(lpfc_mbx_rd_conf_pt, rd_config); + + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + "2027 Read Config Data : ptv:0x%x, tf:0x%x pt:0x%x", + ptv, tf, pt); + if (!ptv) { + lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, + "2019 FW does not support persistent topology " + "Using driver parameter defined value [%s]", + lpfc_topo_to_str[phba->cfg_topology]); + return; + } + /* FW supports persistent topology - override module parameter value */ + phba->hba_flag |= HBA_PERSISTENT_TOPO; + switch (phba->pcidev->device) { + case PCI_DEVICE_ID_LANCER_G7_FC: + if (tf || (pt == LINK_FLAGS_LOOP)) { + /* Invalid values from FW - use driver params */ + phba->hba_flag &= ~HBA_PERSISTENT_TOPO; + } else { + /* Prism only supports PT2PT topology */ + phba->cfg_topology = FLAGS_TOPOLOGY_MODE_PT_PT; + } + break; + case PCI_DEVICE_ID_LANCER_G6_FC: + if (!tf) { + phba->cfg_topology = ((pt == LINK_FLAGS_LOOP) + ? FLAGS_TOPOLOGY_MODE_LOOP + : FLAGS_TOPOLOGY_MODE_PT_PT); + } else { + phba->hba_flag &= ~HBA_PERSISTENT_TOPO; + } + break; + default: /* G5 */ + if (tf) { + /* If topology failover set - pt is '0' or '1' */ + phba->cfg_topology = (pt ? FLAGS_TOPOLOGY_MODE_PT_LOOP : + FLAGS_TOPOLOGY_MODE_LOOP_PT); + } else { + phba->cfg_topology = ((pt == LINK_FLAGS_P2P) + ? FLAGS_TOPOLOGY_MODE_PT_PT + : FLAGS_TOPOLOGY_MODE_LOOP); + } + break; + } + if (phba->hba_flag & HBA_PERSISTENT_TOPO) { + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + "2020 Using persistent topology value [%s]", + lpfc_topo_to_str[phba->cfg_topology]); + } else { + lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, + "2021 Invalid topology values from FW " + "Using driver parameter defined value [%s]", + lpfc_topo_to_str[phba->cfg_topology]); + } +} + /** * lpfc_sli4_read_config - Get the config parameters. * @phba: pointer to lpfc hba data structure. @@ -8346,6 +8472,7 @@ lpfc_sli4_read_config(struct lpfc_hba *phba) phba->max_vpi = (phba->sli4_hba.max_cfg_param.max_vpi > 0) ? (phba->sli4_hba.max_cfg_param.max_vpi - 1) : 0; phba->max_vports = phba->max_vpi; + lpfc_map_topology(phba, rd_config); lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "2003 cfg params Extents? %d " "XRI(B:%d M:%d), " @@ -8619,8 +8746,8 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba) */ if (phba->nvmet_support) { - if (phba->cfg_irq_chann < phba->cfg_nvmet_mrq) - phba->cfg_nvmet_mrq = phba->cfg_irq_chann; + if (phba->cfg_hdw_queue < phba->cfg_nvmet_mrq) + phba->cfg_nvmet_mrq = phba->cfg_hdw_queue; if (phba->cfg_nvmet_mrq > LPFC_NVMET_MRQ_MAX) phba->cfg_nvmet_mrq = LPFC_NVMET_MRQ_MAX; } @@ -9160,6 +9287,8 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba) } spin_unlock_irq(&phba->hbalock); + lpfc_sli4_cleanup_poll_list(phba); + /* Release HBA eqs */ if (phba->sli4_hba.hdwq) lpfc_sli4_release_hdwq(phba); @@ -10581,7 +10710,6 @@ lpfc_find_cpu_handle(struct lpfc_hba *phba, uint16_t id, int match) */ if ((match == LPFC_FIND_BY_EQ) && (cpup->flag & LPFC_CPU_FIRST_IRQ) && - (cpup->irq != LPFC_VECTOR_MAP_EMPTY) && (cpup->eq == id)) return cpu; @@ -10619,6 +10747,75 @@ lpfc_find_hyper(struct lpfc_hba *phba, int cpu, } #endif +/* + * lpfc_assign_eq_map_info - Assigns eq for vector_map structure + * @phba: pointer to lpfc hba data structure. + * @eqidx: index for eq and irq vector + * @flag: flags to set for vector_map structure + * @cpu: cpu used to index vector_map structure + * + * The routine assigns eq info into vector_map structure + */ +static inline void +lpfc_assign_eq_map_info(struct lpfc_hba *phba, uint16_t eqidx, uint16_t flag, + unsigned int cpu) +{ + struct lpfc_vector_map_info *cpup = &phba->sli4_hba.cpu_map[cpu]; + struct lpfc_hba_eq_hdl *eqhdl = lpfc_get_eq_hdl(eqidx); + + cpup->eq = eqidx; + cpup->flag |= flag; + + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3336 Set Affinity: CPU %d irq %d eq %d flag x%x\n", + cpu, eqhdl->irq, cpup->eq, cpup->flag); +} + +/** + * lpfc_cpu_map_array_init - Initialize cpu_map structure + * @phba: pointer to lpfc hba data structure. + * + * The routine initializes the cpu_map array structure + */ +static void +lpfc_cpu_map_array_init(struct lpfc_hba *phba) +{ + struct lpfc_vector_map_info *cpup; + struct lpfc_eq_intr_info *eqi; + int cpu; + + for_each_possible_cpu(cpu) { + cpup = &phba->sli4_hba.cpu_map[cpu]; + cpup->phys_id = LPFC_VECTOR_MAP_EMPTY; + cpup->core_id = LPFC_VECTOR_MAP_EMPTY; + cpup->hdwq = LPFC_VECTOR_MAP_EMPTY; + cpup->eq = LPFC_VECTOR_MAP_EMPTY; + cpup->flag = 0; + eqi = per_cpu_ptr(phba->sli4_hba.eq_info, cpu); + INIT_LIST_HEAD(&eqi->list); + eqi->icnt = 0; + } +} + +/** + * lpfc_hba_eq_hdl_array_init - Initialize hba_eq_hdl structure + * @phba: pointer to lpfc hba data structure. + * + * The routine initializes the hba_eq_hdl array structure + */ +static void +lpfc_hba_eq_hdl_array_init(struct lpfc_hba *phba) +{ + struct lpfc_hba_eq_hdl *eqhdl; + int i; + + for (i = 0; i < phba->cfg_irq_chann; i++) { + eqhdl = lpfc_get_eq_hdl(i); + eqhdl->irq = LPFC_VECTOR_MAP_EMPTY; + eqhdl->phba = phba; + } +} + /** * lpfc_cpu_affinity_check - Check vector CPU affinity mappings * @phba: pointer to lpfc hba data structure. @@ -10637,22 +10834,10 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors) int max_core_id, min_core_id; struct lpfc_vector_map_info *cpup; struct lpfc_vector_map_info *new_cpup; - const struct cpumask *maskp; #ifdef CONFIG_X86 struct cpuinfo_x86 *cpuinfo; #endif - /* Init cpu_map array */ - for_each_possible_cpu(cpu) { - cpup = &phba->sli4_hba.cpu_map[cpu]; - cpup->phys_id = LPFC_VECTOR_MAP_EMPTY; - cpup->core_id = LPFC_VECTOR_MAP_EMPTY; - cpup->hdwq = LPFC_VECTOR_MAP_EMPTY; - cpup->eq = LPFC_VECTOR_MAP_EMPTY; - cpup->irq = LPFC_VECTOR_MAP_EMPTY; - cpup->flag = 0; - } - max_phys_id = 0; min_phys_id = LPFC_VECTOR_MAP_EMPTY; max_core_id = 0; @@ -10688,65 +10873,6 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors) min_core_id = cpup->core_id; } - for_each_possible_cpu(i) { - struct lpfc_eq_intr_info *eqi = - per_cpu_ptr(phba->sli4_hba.eq_info, i); - - INIT_LIST_HEAD(&eqi->list); - eqi->icnt = 0; - } - - /* This loop sets up all CPUs that are affinitized with a - * irq vector assigned to the driver. All affinitized CPUs - * will get a link to that vectors IRQ and EQ. - * - * NULL affinity mask handling: - * If irq count is greater than one, log an error message. - * If the null mask is received for the first irq, find the - * first present cpu, and assign the eq index to ensure at - * least one EQ is assigned. - */ - for (idx = 0; idx < phba->cfg_irq_chann; idx++) { - /* Get a CPU mask for all CPUs affinitized to this vector */ - maskp = pci_irq_get_affinity(phba->pcidev, idx); - if (!maskp) { - if (phba->cfg_irq_chann > 1) - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "3329 No affinity mask found " - "for vector %d (%d)\n", - idx, phba->cfg_irq_chann); - if (!idx) { - cpu = cpumask_first(cpu_present_mask); - cpup = &phba->sli4_hba.cpu_map[cpu]; - cpup->eq = idx; - cpup->irq = pci_irq_vector(phba->pcidev, idx); - cpup->flag |= LPFC_CPU_FIRST_IRQ; - } - break; - } - - i = 0; - /* Loop through all CPUs associated with vector idx */ - for_each_cpu_and(cpu, maskp, cpu_present_mask) { - /* Set the EQ index and IRQ for that vector */ - cpup = &phba->sli4_hba.cpu_map[cpu]; - cpup->eq = idx; - cpup->irq = pci_irq_vector(phba->pcidev, idx); - - /* If this is the first CPU thats assigned to this - * vector, set LPFC_CPU_FIRST_IRQ. - */ - if (!i) - cpup->flag |= LPFC_CPU_FIRST_IRQ; - i++; - - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "3336 Set Affinity: CPU %d " - "irq %d eq %d flag x%x\n", - cpu, cpup->irq, cpup->eq, cpup->flag); - } - } - /* After looking at each irq vector assigned to this pcidev, its * possible to see that not ALL CPUs have been accounted for. * Next we will set any unassigned (unaffinitized) cpu map @@ -10772,7 +10898,7 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors) for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { new_cpup = &phba->sli4_hba.cpu_map[new_cpu]; if (!(new_cpup->flag & LPFC_CPU_MAP_UNASSIGN) && - (new_cpup->irq != LPFC_VECTOR_MAP_EMPTY) && + (new_cpup->eq != LPFC_VECTOR_MAP_EMPTY) && (new_cpup->phys_id == cpup->phys_id)) goto found_same; new_cpu = cpumask_next( @@ -10785,7 +10911,6 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors) found_same: /* We found a matching phys_id, so copy the IRQ info */ cpup->eq = new_cpup->eq; - cpup->irq = new_cpup->irq; /* Bump start_cpu to the next slot to minmize the * chance of having multiple unassigned CPU entries @@ -10797,9 +10922,10 @@ found_same: lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "3337 Set Affinity: CPU %d " - "irq %d from id %d same " + "eq %d from peer cpu %d same " "phys_id (%d)\n", - cpu, cpup->irq, new_cpu, cpup->phys_id); + cpu, cpup->eq, new_cpu, + cpup->phys_id); } } @@ -10823,7 +10949,7 @@ found_same: for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { new_cpup = &phba->sli4_hba.cpu_map[new_cpu]; if (!(new_cpup->flag & LPFC_CPU_MAP_UNASSIGN) && - (new_cpup->irq != LPFC_VECTOR_MAP_EMPTY)) + (new_cpup->eq != LPFC_VECTOR_MAP_EMPTY)) goto found_any; new_cpu = cpumask_next( new_cpu, cpu_present_mask); @@ -10833,13 +10959,12 @@ found_same: /* We should never leave an entry unassigned */ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3339 Set Affinity: CPU %d " - "irq %d UNASSIGNED\n", - cpup->hdwq, cpup->irq); + "eq %d UNASSIGNED\n", + cpup->hdwq, cpup->eq); continue; found_any: /* We found an available entry, copy the IRQ info */ cpup->eq = new_cpup->eq; - cpup->irq = new_cpup->irq; /* Bump start_cpu to the next slot to minmize the * chance of having multiple unassigned CPU entries @@ -10851,8 +10976,8 @@ found_any: lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "3338 Set Affinity: CPU %d " - "irq %d from id %d (%d/%d)\n", - cpu, cpup->irq, new_cpu, + "eq %d from peer cpu %d (%d/%d)\n", + cpu, cpup->eq, new_cpu, new_cpup->phys_id, new_cpup->core_id); } } @@ -10873,11 +10998,11 @@ found_any: idx++; lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3333 Set Affinity: CPU %d (phys %d core %d): " - "hdwq %d eq %d irq %d flg x%x\n", + "hdwq %d eq %d flg x%x\n", cpu, cpup->phys_id, cpup->core_id, - cpup->hdwq, cpup->eq, cpup->irq, cpup->flag); + cpup->hdwq, cpup->eq, cpup->flag); } - /* Finally we need to associate a hdwq with each cpu_map entry + /* Associate a hdwq with each cpu_map entry * This will be 1 to 1 - hdwq to cpu, unless there are less * hardware queues then CPUs. For that case we will just round-robin * the available hardware queues as they get assigned to CPUs. @@ -10951,9 +11076,26 @@ found_any: logit: lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3335 Set Affinity: CPU %d (phys %d core %d): " - "hdwq %d eq %d irq %d flg x%x\n", + "hdwq %d eq %d flg x%x\n", cpu, cpup->phys_id, cpup->core_id, - cpup->hdwq, cpup->eq, cpup->irq, cpup->flag); + cpup->hdwq, cpup->eq, cpup->flag); + } + + /* + * Initialize the cpu_map slots for not-present cpus in case + * a cpu is hot-added. Perform a simple hdwq round robin assignment. + */ + idx = 0; + for_each_possible_cpu(cpu) { + cpup = &phba->sli4_hba.cpu_map[cpu]; + if (cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) + continue; + + cpup->hdwq = idx++ % phba->cfg_hdw_queue; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3340 Set Affinity: not present " + "CPU %d hdwq %d\n", + cpu, cpup->hdwq); } /* The cpu_map array will be used later during initialization @@ -10963,11 +11105,280 @@ found_any: } /** + * lpfc_cpuhp_get_eq + * + * @phba: pointer to lpfc hba data structure. + * @cpu: cpu going offline + * @eqlist: + */ +static void +lpfc_cpuhp_get_eq(struct lpfc_hba *phba, unsigned int cpu, + struct list_head *eqlist) +{ + const struct cpumask *maskp; + struct lpfc_queue *eq; + cpumask_t tmp; + u16 idx; + + for (idx = 0; idx < phba->cfg_irq_chann; idx++) { + maskp = pci_irq_get_affinity(phba->pcidev, idx); + if (!maskp) + continue; + /* + * if irq is not affinitized to the cpu going + * then we don't need to poll the eq attached + * to it. + */ + if (!cpumask_and(&tmp, maskp, cpumask_of(cpu))) + continue; + /* get the cpus that are online and are affini- + * tized to this irq vector. If the count is + * more than 1 then cpuhp is not going to shut- + * down this vector. Since this cpu has not + * gone offline yet, we need >1. + */ + cpumask_and(&tmp, maskp, cpu_online_mask); + if (cpumask_weight(&tmp) > 1) + continue; + + /* Now that we have an irq to shutdown, get the eq + * mapped to this irq. Note: multiple hdwq's in + * the software can share an eq, but eventually + * only eq will be mapped to this vector + */ + eq = phba->sli4_hba.hba_eq_hdl[idx].eq; + list_add(&eq->_poll_list, eqlist); + } +} + +static void __lpfc_cpuhp_remove(struct lpfc_hba *phba) +{ + if (phba->sli_rev != LPFC_SLI_REV4) + return; + + cpuhp_state_remove_instance_nocalls(lpfc_cpuhp_state, + &phba->cpuhp); + /* + * unregistering the instance doesn't stop the polling + * timer. Wait for the poll timer to retire. + */ + synchronize_rcu(); + del_timer_sync(&phba->cpuhp_poll_timer); +} + +static void lpfc_cpuhp_remove(struct lpfc_hba *phba) +{ + if (phba->pport->fc_flag & FC_OFFLINE_MODE) + return; + + __lpfc_cpuhp_remove(phba); +} + +static void lpfc_cpuhp_add(struct lpfc_hba *phba) +{ + if (phba->sli_rev != LPFC_SLI_REV4) + return; + + rcu_read_lock(); + + if (!list_empty(&phba->poll_list)) { + timer_setup(&phba->cpuhp_poll_timer, lpfc_sli4_poll_hbtimer, 0); + mod_timer(&phba->cpuhp_poll_timer, + jiffies + msecs_to_jiffies(LPFC_POLL_HB)); + } + + rcu_read_unlock(); + + cpuhp_state_add_instance_nocalls(lpfc_cpuhp_state, + &phba->cpuhp); +} + +static int __lpfc_cpuhp_checks(struct lpfc_hba *phba, int *retval) +{ + if (phba->pport->load_flag & FC_UNLOADING) { + *retval = -EAGAIN; + return true; + } + + if (phba->sli_rev != LPFC_SLI_REV4) { + *retval = 0; + return true; + } + + /* proceed with the hotplug */ + return false; +} + +/** + * lpfc_irq_set_aff - set IRQ affinity + * @eqhdl: EQ handle + * @cpu: cpu to set affinity + * + **/ +static inline void +lpfc_irq_set_aff(struct lpfc_hba_eq_hdl *eqhdl, unsigned int cpu) +{ + cpumask_clear(&eqhdl->aff_mask); + cpumask_set_cpu(cpu, &eqhdl->aff_mask); + irq_set_status_flags(eqhdl->irq, IRQ_NO_BALANCING); + irq_set_affinity_hint(eqhdl->irq, &eqhdl->aff_mask); +} + +/** + * lpfc_irq_clear_aff - clear IRQ affinity + * @eqhdl: EQ handle + * + **/ +static inline void +lpfc_irq_clear_aff(struct lpfc_hba_eq_hdl *eqhdl) +{ + cpumask_clear(&eqhdl->aff_mask); + irq_clear_status_flags(eqhdl->irq, IRQ_NO_BALANCING); + irq_set_affinity_hint(eqhdl->irq, &eqhdl->aff_mask); +} + +/** + * lpfc_irq_rebalance - rebalances IRQ affinity according to cpuhp event + * @phba: pointer to HBA context object. + * @cpu: cpu going offline/online + * @offline: true, cpu is going offline. false, cpu is coming online. + * + * If cpu is going offline, we'll try our best effort to find the next + * online cpu on the phba's NUMA node and migrate all offlining IRQ affinities. + * + * If cpu is coming online, reaffinitize the IRQ back to the onlineng cpu. + * + * Note: Call only if cfg_irq_numa is enabled, otherwise rely on + * PCI_IRQ_AFFINITY to auto-manage IRQ affinity. + * + **/ +static void +lpfc_irq_rebalance(struct lpfc_hba *phba, unsigned int cpu, bool offline) +{ + struct lpfc_vector_map_info *cpup; + struct cpumask *aff_mask; + unsigned int cpu_select, cpu_next, idx; + const struct cpumask *numa_mask; + + if (!phba->cfg_irq_numa) + return; + + numa_mask = &phba->sli4_hba.numa_mask; + + if (!cpumask_test_cpu(cpu, numa_mask)) + return; + + cpup = &phba->sli4_hba.cpu_map[cpu]; + + if (!(cpup->flag & LPFC_CPU_FIRST_IRQ)) + return; + + if (offline) { + /* Find next online CPU on NUMA node */ + cpu_next = cpumask_next_wrap(cpu, numa_mask, cpu, true); + cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu_next); + + /* Found a valid CPU */ + if ((cpu_select < nr_cpu_ids) && (cpu_select != cpu)) { + /* Go through each eqhdl and ensure offlining + * cpu aff_mask is migrated + */ + for (idx = 0; idx < phba->cfg_irq_chann; idx++) { + aff_mask = lpfc_get_aff_mask(idx); + + /* Migrate affinity */ + if (cpumask_test_cpu(cpu, aff_mask)) + lpfc_irq_set_aff(lpfc_get_eq_hdl(idx), + cpu_select); + } + } else { + /* Rely on irqbalance if no online CPUs left on NUMA */ + for (idx = 0; idx < phba->cfg_irq_chann; idx++) + lpfc_irq_clear_aff(lpfc_get_eq_hdl(idx)); + } + } else { + /* Migrate affinity back to this CPU */ + lpfc_irq_set_aff(lpfc_get_eq_hdl(cpup->eq), cpu); + } +} + +static int lpfc_cpu_offline(unsigned int cpu, struct hlist_node *node) +{ + struct lpfc_hba *phba = hlist_entry_safe(node, struct lpfc_hba, cpuhp); + struct lpfc_queue *eq, *next; + LIST_HEAD(eqlist); + int retval; + + if (!phba) { + WARN_ONCE(!phba, "cpu: %u. phba:NULL", raw_smp_processor_id()); + return 0; + } + + if (__lpfc_cpuhp_checks(phba, &retval)) + return retval; + + lpfc_irq_rebalance(phba, cpu, true); + + lpfc_cpuhp_get_eq(phba, cpu, &eqlist); + + /* start polling on these eq's */ + list_for_each_entry_safe(eq, next, &eqlist, _poll_list) { + list_del_init(&eq->_poll_list); + lpfc_sli4_start_polling(eq); + } + + return 0; +} + +static int lpfc_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + struct lpfc_hba *phba = hlist_entry_safe(node, struct lpfc_hba, cpuhp); + struct lpfc_queue *eq, *next; + unsigned int n; + int retval; + + if (!phba) { + WARN_ONCE(!phba, "cpu: %u. phba:NULL", raw_smp_processor_id()); + return 0; + } + + if (__lpfc_cpuhp_checks(phba, &retval)) + return retval; + + lpfc_irq_rebalance(phba, cpu, false); + + list_for_each_entry_safe(eq, next, &phba->poll_list, _poll_list) { + n = lpfc_find_cpu_handle(phba, eq->hdwq, LPFC_FIND_BY_HDWQ); + if (n == cpu) + lpfc_sli4_stop_polling(eq); + } + + return 0; +} + +/** * lpfc_sli4_enable_msix - Enable MSI-X interrupt mode to SLI-4 device * @phba: pointer to lpfc hba data structure. * * This routine is invoked to enable the MSI-X interrupt vectors to device - * with SLI-4 interface spec. + * with SLI-4 interface spec. It also allocates MSI-X vectors and maps them + * to cpus on the system. + * + * When cfg_irq_numa is enabled, the adapter will only allocate vectors for + * the number of cpus on the same numa node as this adapter. The vectors are + * allocated without requesting OS affinity mapping. A vector will be + * allocated and assigned to each online and offline cpu. If the cpu is + * online, then affinity will be set to that cpu. If the cpu is offline, then + * affinity will be set to the nearest peer cpu within the numa node that is + * online. If there are no online cpus within the numa node, affinity is not + * assigned and the OS may do as it pleases. Note: cpu vector affinity mapping + * is consistent with the way cpu online/offline is handled when cfg_irq_numa is + * configured. + * + * If numa mode is not enabled and there is more than 1 vector allocated, then + * the driver relies on the managed irq interface where the OS assigns vector to + * cpu affinity. The driver will then use that affinity mapping to setup its + * cpu mapping table. * * Return codes * 0 - successful @@ -10978,13 +11389,31 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) { int vectors, rc, index; char *name; + const struct cpumask *numa_mask = NULL; + unsigned int cpu = 0, cpu_cnt = 0, cpu_select = nr_cpu_ids; + struct lpfc_hba_eq_hdl *eqhdl; + const struct cpumask *maskp; + bool first; + unsigned int flags = PCI_IRQ_MSIX; /* Set up MSI-X multi-message vectors */ vectors = phba->cfg_irq_chann; - rc = pci_alloc_irq_vectors(phba->pcidev, - 1, - vectors, PCI_IRQ_MSIX | PCI_IRQ_AFFINITY); + if (phba->cfg_irq_numa) { + numa_mask = &phba->sli4_hba.numa_mask; + cpu_cnt = cpumask_weight(numa_mask); + vectors = min(phba->cfg_irq_chann, cpu_cnt); + + /* cpu: iterates over numa_mask including offline or online + * cpu_select: iterates over online numa_mask to set affinity + */ + cpu = cpumask_first(numa_mask); + cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu); + } else { + flags |= PCI_IRQ_AFFINITY; + } + + rc = pci_alloc_irq_vectors(phba->pcidev, 1, vectors, flags); if (rc < 0) { lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "0484 PCI enable MSI-X failed (%d)\n", rc); @@ -10994,23 +11423,61 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) /* Assign MSI-X vectors to interrupt handlers */ for (index = 0; index < vectors; index++) { - name = phba->sli4_hba.hba_eq_hdl[index].handler_name; + eqhdl = lpfc_get_eq_hdl(index); + name = eqhdl->handler_name; memset(name, 0, LPFC_SLI4_HANDLER_NAME_SZ); snprintf(name, LPFC_SLI4_HANDLER_NAME_SZ, LPFC_DRIVER_HANDLER_NAME"%d", index); - phba->sli4_hba.hba_eq_hdl[index].idx = index; - phba->sli4_hba.hba_eq_hdl[index].phba = phba; + eqhdl->idx = index; rc = request_irq(pci_irq_vector(phba->pcidev, index), &lpfc_sli4_hba_intr_handler, 0, - name, - &phba->sli4_hba.hba_eq_hdl[index]); + name, eqhdl); if (rc) { lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, "0486 MSI-X fast-path (%d) " "request_irq failed (%d)\n", index, rc); goto cfg_fail_out; } + + eqhdl->irq = pci_irq_vector(phba->pcidev, index); + + if (phba->cfg_irq_numa) { + /* If found a neighboring online cpu, set affinity */ + if (cpu_select < nr_cpu_ids) + lpfc_irq_set_aff(eqhdl, cpu_select); + + /* Assign EQ to cpu_map */ + lpfc_assign_eq_map_info(phba, index, + LPFC_CPU_FIRST_IRQ, + cpu); + + /* Iterate to next offline or online cpu in numa_mask */ + cpu = cpumask_next(cpu, numa_mask); + + /* Find next online cpu in numa_mask to set affinity */ + cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu); + } else if (vectors == 1) { + cpu = cpumask_first(cpu_present_mask); + lpfc_assign_eq_map_info(phba, index, LPFC_CPU_FIRST_IRQ, + cpu); + } else { + maskp = pci_irq_get_affinity(phba->pcidev, index); + + first = true; + /* Loop through all CPUs associated with vector index */ + for_each_cpu_and(cpu, maskp, cpu_present_mask) { + /* If this is the first CPU thats assigned to + * this vector, set LPFC_CPU_FIRST_IRQ. + */ + lpfc_assign_eq_map_info(phba, index, + first ? + LPFC_CPU_FIRST_IRQ : 0, + cpu); + if (first) + first = false; + } + } } if (vectors != phba->cfg_irq_chann) { @@ -11020,17 +11487,18 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) phba->cfg_irq_chann, vectors); if (phba->cfg_irq_chann > vectors) phba->cfg_irq_chann = vectors; - if (phba->nvmet_support && (phba->cfg_nvmet_mrq > vectors)) - phba->cfg_nvmet_mrq = vectors; } return rc; cfg_fail_out: /* free the irq already requested */ - for (--index; index >= 0; index--) - free_irq(pci_irq_vector(phba->pcidev, index), - &phba->sli4_hba.hba_eq_hdl[index]); + for (--index; index >= 0; index--) { + eqhdl = lpfc_get_eq_hdl(index); + lpfc_irq_clear_aff(eqhdl); + irq_set_affinity_hint(eqhdl->irq, NULL); + free_irq(eqhdl->irq, eqhdl); + } /* Unconfigure MSI-X capability structure */ pci_free_irq_vectors(phba->pcidev); @@ -11057,6 +11525,8 @@ static int lpfc_sli4_enable_msi(struct lpfc_hba *phba) { int rc, index; + unsigned int cpu; + struct lpfc_hba_eq_hdl *eqhdl; rc = pci_alloc_irq_vectors(phba->pcidev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_AFFINITY); @@ -11078,9 +11548,15 @@ lpfc_sli4_enable_msi(struct lpfc_hba *phba) return rc; } + eqhdl = lpfc_get_eq_hdl(0); + eqhdl->irq = pci_irq_vector(phba->pcidev, 0); + + cpu = cpumask_first(cpu_present_mask); + lpfc_assign_eq_map_info(phba, 0, LPFC_CPU_FIRST_IRQ, cpu); + for (index = 0; index < phba->cfg_irq_chann; index++) { - phba->sli4_hba.hba_eq_hdl[index].idx = index; - phba->sli4_hba.hba_eq_hdl[index].phba = phba; + eqhdl = lpfc_get_eq_hdl(index); + eqhdl->idx = index; } return 0; @@ -11138,15 +11614,21 @@ lpfc_sli4_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode) IRQF_SHARED, LPFC_DRIVER_NAME, phba); if (!retval) { struct lpfc_hba_eq_hdl *eqhdl; + unsigned int cpu; /* Indicate initialization to INTx mode */ phba->intr_type = INTx; intr_mode = 0; + eqhdl = lpfc_get_eq_hdl(0); + eqhdl->irq = pci_irq_vector(phba->pcidev, 0); + + cpu = cpumask_first(cpu_present_mask); + lpfc_assign_eq_map_info(phba, 0, LPFC_CPU_FIRST_IRQ, + cpu); for (idx = 0; idx < phba->cfg_irq_chann; idx++) { - eqhdl = &phba->sli4_hba.hba_eq_hdl[idx]; + eqhdl = lpfc_get_eq_hdl(idx); eqhdl->idx = idx; - eqhdl->phba = phba; } } } @@ -11168,14 +11650,14 @@ lpfc_sli4_disable_intr(struct lpfc_hba *phba) /* Disable the currently initialized interrupt mode */ if (phba->intr_type == MSIX) { int index; + struct lpfc_hba_eq_hdl *eqhdl; /* Free up MSI-X multi-message vectors */ for (index = 0; index < phba->cfg_irq_chann; index++) { - irq_set_affinity_hint( - pci_irq_vector(phba->pcidev, index), - NULL); - free_irq(pci_irq_vector(phba->pcidev, index), - &phba->sli4_hba.hba_eq_hdl[index]); + eqhdl = lpfc_get_eq_hdl(index); + lpfc_irq_clear_aff(eqhdl); + irq_set_affinity_hint(eqhdl->irq, NULL); + free_irq(eqhdl->irq, eqhdl); } } else { free_irq(phba->pcidev->irq, phba); @@ -11367,6 +11849,9 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba) /* Wait for completion of device XRI exchange busy */ lpfc_sli4_xri_exchange_busy_wait(phba); + /* per-phba callback de-registration for hotplug event */ + lpfc_cpuhp_remove(phba); + /* Disable PCI subsystem interrupt */ lpfc_sli4_disable_intr(phba); @@ -11538,6 +12023,7 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) sli4_params->cqav = bf_get(cfg_cqav, mbx_sli4_parameters); sli4_params->wqsize = bf_get(cfg_wqsize, mbx_sli4_parameters); sli4_params->bv1s = bf_get(cfg_bv1s, mbx_sli4_parameters); + sli4_params->pls = bf_get(cfg_pvl, mbx_sli4_parameters); sli4_params->sgl_pages_max = bf_get(cfg_sgl_page_cnt, mbx_sli4_parameters); sli4_params->wqpcnt = bf_get(cfg_wqpcnt, mbx_sli4_parameters); @@ -11589,13 +12075,10 @@ fcponly: } /* If the NVME FC4 type is enabled, scale the sg_seg_cnt to - * accommodate 512K and 1M IOs in a single nvme buf and supply - * enough NVME LS iocb buffers for larger connectivity counts. + * accommodate 512K and 1M IOs in a single nvme buf. */ - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) phba->cfg_sg_seg_cnt = LPFC_MAX_NVME_SEG_CNT; - phba->cfg_iocb_cnt = 5; - } /* Only embed PBDE for if_type 6, PBDE support requires xib be set */ if ((bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) != @@ -12312,35 +12795,57 @@ lpfc_sli4_get_iocb_cnt(struct lpfc_hba *phba) } -static void +static int lpfc_log_write_firmware_error(struct lpfc_hba *phba, uint32_t offset, uint32_t magic_number, uint32_t ftype, uint32_t fid, uint32_t fsize, const struct firmware *fw) { - if ((offset == ADD_STATUS_FW_NOT_SUPPORTED) || + int rc; + + /* Three cases: (1) FW was not supported on the detected adapter. + * (2) FW update has been locked out administratively. + * (3) Some other error during FW update. + * In each case, an unmaskable message is written to the console + * for admin diagnosis. + */ + if (offset == ADD_STATUS_FW_NOT_SUPPORTED || (phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC && - magic_number != MAGIC_NUMER_G6) || + magic_number != MAGIC_NUMBER_G6) || (phba->pcidev->device == PCI_DEVICE_ID_LANCER_G7_FC && - magic_number != MAGIC_NUMER_G7)) + magic_number != MAGIC_NUMBER_G7)) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "3030 This firmware version is not supported on " - "this HBA model. Device:%x Magic:%x Type:%x " - "ID:%x Size %d %zd\n", - phba->pcidev->device, magic_number, ftype, fid, - fsize, fw->size); - else + "3030 This firmware version is not supported on" + " this HBA model. Device:%x Magic:%x Type:%x " + "ID:%x Size %d %zd\n", + phba->pcidev->device, magic_number, ftype, fid, + fsize, fw->size); + rc = -EINVAL; + } else if (offset == ADD_STATUS_FW_DOWNLOAD_HW_DISABLED) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "3022 FW Download failed. Device:%x Magic:%x Type:%x " - "ID:%x Size %d %zd\n", - phba->pcidev->device, magic_number, ftype, fid, - fsize, fw->size); + "3021 Firmware downloads have been prohibited " + "by a system configuration setting on " + "Device:%x Magic:%x Type:%x ID:%x Size %d " + "%zd\n", + phba->pcidev->device, magic_number, ftype, fid, + fsize, fw->size); + rc = -EACCES; + } else { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3022 FW Download failed. Add Status x%x " + "Device:%x Magic:%x Type:%x ID:%x Size %d " + "%zd\n", + offset, phba->pcidev->device, magic_number, + ftype, fid, fsize, fw->size); + rc = -EIO; + } + return rc; } - /** * lpfc_write_firmware - attempt to write a firmware image to the port * @fw: pointer to firmware image returned from request_firmware. - * @phba: pointer to lpfc hba data structure. + * @context: pointer to firmware image returned from request_firmware. + * @ret: return value this routine provides to the caller. * **/ static void @@ -12409,8 +12914,12 @@ lpfc_write_firmware(const struct firmware *fw, void *context) rc = lpfc_wr_object(phba, &dma_buffer_list, (fw->size - offset), &offset); if (rc) { - lpfc_log_write_firmware_error(phba, offset, - magic_number, ftype, fid, fsize, fw); + rc = lpfc_log_write_firmware_error(phba, offset, + magic_number, + ftype, + fid, + fsize, + fw); goto release_out; } } @@ -12430,9 +12939,12 @@ release_out: } release_firmware(fw); out: - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "3024 Firmware update done: %d.\n", rc); - return; + if (rc < 0) + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3062 Firmware update error, status %d.\n", rc); + else + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3024 Firmware update success: size %d.\n", rc); } /** @@ -12551,6 +13063,12 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) phba->pport = NULL; lpfc_stop_port(phba); + /* Init cpu_map array */ + lpfc_cpu_map_array_init(phba); + + /* Init hba_eq_hdl array */ + lpfc_hba_eq_hdl_array_init(phba); + /* Configure and enable interrupt */ intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode); if (intr_mode == LPFC_INTR_ERROR) { @@ -12632,6 +13150,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) /* Enable RAS FW log support */ lpfc_sli4_ras_setup(phba); + INIT_LIST_HEAD(&phba->poll_list); + cpuhp_state_add_instance_nocalls(lpfc_cpuhp_state, &phba->cpuhp); + return 0; out_free_sysfs_attr: @@ -13344,8 +13865,7 @@ lpfc_sli4_oas_verify(struct lpfc_hba *phba) phba->cfg_fof = 1; } else { phba->cfg_fof = 0; - if (phba->device_data_mem_pool) - mempool_destroy(phba->device_data_mem_pool); + mempool_destroy(phba->device_data_mem_pool); phba->device_data_mem_pool = NULL; } @@ -13450,11 +13970,24 @@ lpfc_init(void) /* Initialize in case vector mapping is needed */ lpfc_present_cpu = num_present_cpus(); + error = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "lpfc/sli4:online", + lpfc_cpu_online, lpfc_cpu_offline); + if (error < 0) + goto cpuhp_failure; + lpfc_cpuhp_state = error; + error = pci_register_driver(&lpfc_driver); - if (error) { - fc_release_transport(lpfc_transport_template); - fc_release_transport(lpfc_vport_transport_template); - } + if (error) + goto unwind; + + return error; + +unwind: + cpuhp_remove_multi_state(lpfc_cpuhp_state); +cpuhp_failure: + fc_release_transport(lpfc_transport_template); + fc_release_transport(lpfc_vport_transport_template); return error; } @@ -13471,6 +14004,7 @@ lpfc_exit(void) { misc_deregister(&lpfc_mgmt_dev); pci_unregister_driver(&lpfc_driver); + cpuhp_remove_multi_state(lpfc_cpuhp_state); fc_release_transport(lpfc_transport_template); fc_release_transport(lpfc_vport_transport_template); idr_destroy(&lpfc_hba_index); diff --git a/drivers/scsi/lpfc/lpfc_logmsg.h b/drivers/scsi/lpfc/lpfc_logmsg.h index ea10f03437f5..148d02a27b58 100644 --- a/drivers/scsi/lpfc/lpfc_logmsg.h +++ b/drivers/scsi/lpfc/lpfc_logmsg.h @@ -46,6 +46,23 @@ #define LOG_NVME_IOERR 0x00800000 /* NVME IO Error events. */ #define LOG_ALL_MSG 0xffffffff /* LOG all messages */ +/* generate message by verbose log setting or severity */ +#define lpfc_vlog_msg(vport, level, mask, fmt, arg...) \ +{ if (((mask) & (vport)->cfg_log_verbose) || (level[1] <= '4')) \ + dev_printk(level, &((vport)->phba->pcidev)->dev, "%d:(%d):" \ + fmt, (vport)->phba->brd_no, vport->vpi, ##arg); } + +#define lpfc_log_msg(phba, level, mask, fmt, arg...) \ +do { \ + { uint32_t log_verbose = (phba)->pport ? \ + (phba)->pport->cfg_log_verbose : \ + (phba)->cfg_log_verbose; \ + if (((mask) & log_verbose) || (level[1] <= '4')) \ + dev_printk(level, &((phba)->pcidev)->dev, "%d:" \ + fmt, phba->brd_no, ##arg); \ + } \ +} while (0) + #define lpfc_printf_vlog(vport, level, mask, fmt, arg...) \ do { \ { if (((mask) & (vport)->cfg_log_verbose) || (level[1] <= '3')) \ diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c index 8abe933bad09..d1773c01d2b3 100644 --- a/drivers/scsi/lpfc/lpfc_mbox.c +++ b/drivers/scsi/lpfc/lpfc_mbox.c @@ -515,6 +515,7 @@ lpfc_init_link(struct lpfc_hba * phba, if ((phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC || phba->pcidev->device == PCI_DEVICE_ID_LANCER_G7_FC) && + !(phba->sli4_hba.pc_sli4_params.pls) && mb->un.varInitLnk.link_flags & FLAGS_TOPOLOGY_MODE_LOOP) { mb->un.varInitLnk.link_flags = FLAGS_TOPOLOGY_MODE_PT_PT; phba->cfg_topology = FLAGS_TOPOLOGY_MODE_PT_PT; diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c index ae09bb863497..7082279e4c01 100644 --- a/drivers/scsi/lpfc/lpfc_mem.c +++ b/drivers/scsi/lpfc/lpfc_mem.c @@ -230,9 +230,6 @@ lpfc_mem_free(struct lpfc_hba *phba) dma_pool_destroy(phba->lpfc_hrb_pool); phba->lpfc_hrb_pool = NULL; - dma_pool_destroy(phba->txrdy_payload_pool); - phba->txrdy_payload_pool = NULL; - dma_pool_destroy(phba->lpfc_hbq_pool); phba->lpfc_hbq_pool = NULL; diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index fc6e4546d738..ae4359013846 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -279,6 +279,55 @@ lpfc_els_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) lpfc_cancel_retry_delay_tmo(phba->pport, ndlp); } +/* lpfc_defer_pt2pt_acc - Complete SLI3 pt2pt processing on link up + * @phba: pointer to lpfc hba data structure. + * @link_mbox: pointer to CONFIG_LINK mailbox object + * + * This routine is only called if we are SLI3, direct connect pt2pt + * mode and the remote NPort issues the PLOGI after link up. + */ +static void +lpfc_defer_pt2pt_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *link_mbox) +{ + LPFC_MBOXQ_t *login_mbox; + MAILBOX_t *mb = &link_mbox->u.mb; + struct lpfc_iocbq *save_iocb; + struct lpfc_nodelist *ndlp; + int rc; + + ndlp = link_mbox->ctx_ndlp; + login_mbox = link_mbox->context3; + save_iocb = login_mbox->context3; + link_mbox->context3 = NULL; + login_mbox->context3 = NULL; + + /* Check for CONFIG_LINK error */ + if (mb->mbxStatus) { + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "4575 CONFIG_LINK fails pt2pt discovery: %x\n", + mb->mbxStatus); + mempool_free(login_mbox, phba->mbox_mem_pool); + mempool_free(link_mbox, phba->mbox_mem_pool); + lpfc_sli_release_iocbq(phba, save_iocb); + return; + } + + /* Now that CONFIG_LINK completed, and our SID is configured, + * we can now proceed with sending the PLOGI ACC. + */ + rc = lpfc_els_rsp_acc(link_mbox->vport, ELS_CMD_PLOGI, + save_iocb, ndlp, login_mbox); + if (rc) { + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "4576 PLOGI ACC fails pt2pt discovery: %x\n", + rc); + mempool_free(login_mbox, phba->mbox_mem_pool); + } + + mempool_free(link_mbox, phba->mbox_mem_pool); + lpfc_sli_release_iocbq(phba, save_iocb); +} + static int lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, struct lpfc_iocbq *cmdiocb) @@ -291,10 +340,12 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, IOCB_t *icmd; struct serv_parm *sp; uint32_t ed_tov; - LPFC_MBOXQ_t *mbox; + LPFC_MBOXQ_t *link_mbox; + LPFC_MBOXQ_t *login_mbox; + struct lpfc_iocbq *save_iocb; struct ls_rjt stat; uint32_t vid, flag; - int rc; + int rc, defer_acc; memset(&stat, 0, sizeof (struct ls_rjt)); pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; @@ -343,6 +394,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, else ndlp->nlp_fcp_info |= CLASS3; + defer_acc = 0; ndlp->nlp_class_sup = 0; if (sp->cls1.classValid) ndlp->nlp_class_sup |= FC_COS_CLASS1; @@ -354,7 +406,6 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ndlp->nlp_class_sup |= FC_COS_CLASS4; ndlp->nlp_maxframe = ((sp->cmn.bbRcvSizeMsb & 0x0F) << 8) | sp->cmn.bbRcvSizeLsb; - /* if already logged in, do implicit logout */ switch (ndlp->nlp_state) { case NLP_STE_NPR_NODE: @@ -396,6 +447,10 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ndlp->nlp_fcp_info &= ~NLP_FCP_2_DEVICE; ndlp->nlp_flag &= ~NLP_FIRSTBURST; + login_mbox = NULL; + link_mbox = NULL; + save_iocb = NULL; + /* Check for Nport to NPort pt2pt protocol */ if ((vport->fc_flag & FC_PT2PT) && !(vport->fc_flag & FC_PT2PT_PLOGI)) { @@ -423,17 +478,22 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, if (phba->sli_rev == LPFC_SLI_REV4) lpfc_issue_reg_vfi(vport); else { - mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (mbox == NULL) + defer_acc = 1; + link_mbox = mempool_alloc(phba->mbox_mem_pool, + GFP_KERNEL); + if (!link_mbox) goto out; - lpfc_config_link(phba, mbox); - mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; - mbox->vport = vport; - rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT); - if (rc == MBX_NOT_FINISHED) { - mempool_free(mbox, phba->mbox_mem_pool); + lpfc_config_link(phba, link_mbox); + link_mbox->mbox_cmpl = lpfc_defer_pt2pt_acc; + link_mbox->vport = vport; + link_mbox->ctx_ndlp = ndlp; + + save_iocb = lpfc_sli_get_iocbq(phba); + if (!save_iocb) goto out; - } + /* Save info from cmd IOCB used in rsp */ + memcpy((uint8_t *)save_iocb, (uint8_t *)cmdiocb, + sizeof(struct lpfc_iocbq)); } lpfc_can_disctmo(vport); @@ -448,8 +508,8 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ndlp->nlp_flag |= NLP_SUPPRESS_RSP; } - mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (!mbox) + login_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!login_mbox) goto out; /* Registering an existing RPI behaves differently for SLI3 vs SLI4 */ @@ -457,21 +517,19 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, lpfc_unreg_rpi(vport, ndlp); rc = lpfc_reg_rpi(phba, vport->vpi, icmd->un.rcvels.remoteID, - (uint8_t *) sp, mbox, ndlp->nlp_rpi); - if (rc) { - mempool_free(mbox, phba->mbox_mem_pool); + (uint8_t *)sp, login_mbox, ndlp->nlp_rpi); + if (rc) goto out; - } /* ACC PLOGI rsp command needs to execute first, - * queue this mbox command to be processed later. + * queue this login_mbox command to be processed later. */ - mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login; + login_mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login; /* - * mbox->ctx_ndlp = lpfc_nlp_get(ndlp) deferred until mailbox + * login_mbox->ctx_ndlp = lpfc_nlp_get(ndlp) deferred until mailbox * command issued in lpfc_cmpl_els_acc(). */ - mbox->vport = vport; + login_mbox->vport = vport; spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= (NLP_ACC_REGLOGIN | NLP_RCV_PLOGI); spin_unlock_irq(shost->host_lock); @@ -484,8 +542,10 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, * single discovery thread, this will cause a huge delay in * discovery. Also this will cause multiple state machines * running in parallel for this node. + * This only applies to a fabric environment. */ - if (ndlp->nlp_state == NLP_STE_PLOGI_ISSUE) { + if ((ndlp->nlp_state == NLP_STE_PLOGI_ISSUE) && + (vport->fc_flag & FC_FABRIC)) { /* software abort outstanding PLOGI */ lpfc_els_abort(phba, ndlp); } @@ -504,16 +564,47 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, stat.un.b.lsRjtRsnCode = LSRJT_INVALID_CMD; stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; rc = lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, - ndlp, mbox); + ndlp, login_mbox); if (rc) - mempool_free(mbox, phba->mbox_mem_pool); + mempool_free(login_mbox, phba->mbox_mem_pool); + return 1; + } + if (defer_acc) { + /* So the order here should be: + * Issue CONFIG_LINK mbox + * CONFIG_LINK cmpl + * Issue PLOGI ACC + * PLOGI ACC cmpl + * Issue REG_LOGIN mbox + */ + + /* Save the REG_LOGIN mbox for and rcv IOCB copy later */ + link_mbox->context3 = login_mbox; + login_mbox->context3 = save_iocb; + + /* Start the ball rolling by issuing CONFIG_LINK here */ + rc = lpfc_sli_issue_mbox(phba, link_mbox, MBX_NOWAIT); + if (rc == MBX_NOT_FINISHED) + goto out; return 1; } - rc = lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, mbox); + + rc = lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, login_mbox); if (rc) - mempool_free(mbox, phba->mbox_mem_pool); + mempool_free(login_mbox, phba->mbox_mem_pool); return 1; out: + if (defer_acc) + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, + "4577 pt2pt discovery failure: %p %p %p\n", + save_iocb, link_mbox, login_mbox); + if (save_iocb) + lpfc_sli_release_iocbq(phba, save_iocb); + if (link_mbox) + mempool_free(link_mbox, phba->mbox_mem_pool); + if (login_mbox) + mempool_free(login_mbox, phba->mbox_mem_pool); + stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; stat.un.b.lsRjtRsnCodeExp = LSEXP_OUT_OF_RESOURCE; lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL); @@ -2030,7 +2121,9 @@ lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, if (bf_get_be32(prli_init, nvpr)) ndlp->nlp_type |= NLP_NVME_INITIATOR; - if (phba->nsler && bf_get_be32(prli_nsler, nvpr)) + if (phba->nsler && bf_get_be32(prli_nsler, nvpr) && + bf_get_be32(prli_conf, nvpr)) + ndlp->nlp_nvme_info |= NLP_NVME_NSLER; else ndlp->nlp_nvme_info &= ~NLP_NVME_NSLER; diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index a227e36cbdc2..db4a04a207ec 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -196,6 +196,46 @@ lpfc_nvme_cmd_template(void) } /** + * lpfc_nvme_prep_abort_wqe - set up 'abort' work queue entry. + * @pwqeq: Pointer to command iocb. + * @xritag: Tag that uniqely identifies the local exchange resource. + * @opt: Option bits - + * bit 0 = inhibit sending abts on the link + * + * This function is called with hbalock held. + **/ +void +lpfc_nvme_prep_abort_wqe(struct lpfc_iocbq *pwqeq, u16 xritag, u8 opt) +{ + union lpfc_wqe128 *wqe = &pwqeq->wqe; + + /* WQEs are reused. Clear stale data and set key fields to + * zero like ia, iaab, iaar, xri_tag, and ctxt_tag. + */ + memset(wqe, 0, sizeof(*wqe)); + + if (opt & INHIBIT_ABORT) + bf_set(abort_cmd_ia, &wqe->abort_cmd, 1); + /* Abort specified xri tag, with the mask deliberately zeroed */ + bf_set(abort_cmd_criteria, &wqe->abort_cmd, T_XRI_TAG); + + bf_set(wqe_cmnd, &wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX); + + /* Abort the IO associated with this outstanding exchange ID. */ + wqe->abort_cmd.wqe_com.abort_tag = xritag; + + /* iotag for the wqe completion. */ + bf_set(wqe_reqtag, &wqe->abort_cmd.wqe_com, pwqeq->iotag); + + bf_set(wqe_qosd, &wqe->abort_cmd.wqe_com, 1); + bf_set(wqe_lenloc, &wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE); + + bf_set(wqe_cmd_type, &wqe->abort_cmd.wqe_com, OTHER_COMMAND); + bf_set(wqe_wqec, &wqe->abort_cmd.wqe_com, 1); + bf_set(wqe_cqid, &wqe->abort_cmd.wqe_com, LPFC_WQE_CQ_ID_DEFAULT); +} + +/** * lpfc_nvme_create_queue - * @lpfc_pnvme: Pointer to the driver's nvme instance data * @qidx: An cpu index used to affinitize IO queues and MSIX vectors. @@ -1791,7 +1831,6 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, struct lpfc_iocbq *abts_buf; struct lpfc_iocbq *nvmereq_wqe; struct lpfc_nvme_fcpreq_priv *freqpriv; - union lpfc_wqe128 *abts_wqe; unsigned long flags; int ret_val; @@ -1912,37 +1951,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, /* Ready - mark outstanding as aborted by driver. */ nvmereq_wqe->iocb_flag |= LPFC_DRIVER_ABORTED; - /* Complete prepping the abort wqe and issue to the FW. */ - abts_wqe = &abts_buf->wqe; - - /* WQEs are reused. Clear stale data and set key fields to - * zero like ia, iaab, iaar, xri_tag, and ctxt_tag. - */ - memset(abts_wqe, 0, sizeof(*abts_wqe)); - bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG); - - /* word 7 */ - bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX); - bf_set(wqe_class, &abts_wqe->abort_cmd.wqe_com, - nvmereq_wqe->iocb.ulpClass); - - /* word 8 - tell the FW to abort the IO associated with this - * outstanding exchange ID. - */ - abts_wqe->abort_cmd.wqe_com.abort_tag = nvmereq_wqe->sli4_xritag; - - /* word 9 - this is the iotag for the abts_wqe completion. */ - bf_set(wqe_reqtag, &abts_wqe->abort_cmd.wqe_com, - abts_buf->iotag); - - /* word 10 */ - bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1); - bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE); - - /* word 11 */ - bf_set(wqe_cmd_type, &abts_wqe->abort_cmd.wqe_com, OTHER_COMMAND); - bf_set(wqe_wqec, &abts_wqe->abort_cmd.wqe_com, 1); - bf_set(wqe_cqid, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_CQ_ID_DEFAULT); + lpfc_nvme_prep_abort_wqe(abts_buf, nvmereq_wqe->sli4_xritag, 0); /* ABTS WQE must go to the same WQ as the WQE to be aborted */ abts_buf->iocb_flag |= LPFC_IO_NVME; @@ -2084,7 +2093,7 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd) lpfc_ncmd->flags &= ~LPFC_SBUF_BUMP_QDEPTH; qp = lpfc_ncmd->hdwq; - if (lpfc_ncmd->flags & LPFC_SBUF_XBUSY) { + if (unlikely(lpfc_ncmd->flags & LPFC_SBUF_XBUSY)) { lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, "6310 XB release deferred for " "ox_id x%x on reqtag x%x\n", @@ -2139,12 +2148,10 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) */ lpfc_nvme_template.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1; - /* Advertise how many hw queues we support based on fcp_io_sched */ - if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) - lpfc_nvme_template.max_hw_queues = phba->cfg_hdw_queue; - else - lpfc_nvme_template.max_hw_queues = - phba->sli4_hba.num_present_cpu; + /* Advertise how many hw queues we support based on cfg_hdw_queue, + * which will not exceed cpu count. + */ + lpfc_nvme_template.max_hw_queues = phba->cfg_hdw_queue; if (!IS_ENABLED(CONFIG_NVME_FC)) return ret; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 9884228800a5..9dc9afe1c255 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -378,13 +378,6 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) int cpu; unsigned long iflag; - if (ctxp->txrdy) { - dma_pool_free(phba->txrdy_payload_pool, ctxp->txrdy, - ctxp->txrdy_phys); - ctxp->txrdy = NULL; - ctxp->txrdy_phys = 0; - } - if (ctxp->state == LPFC_NVMET_STE_FREE) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6411 NVMET free, already free IO x%x: %d %d\n", @@ -430,7 +423,6 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context; ctxp->wqeq = NULL; - ctxp->txrdy = NULL; ctxp->offset = 0; ctxp->phba = phba; ctxp->size = size; @@ -1958,12 +1950,10 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, uint32_t *payload; uint32_t size, oxid, sid, rc; - fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt); - oxid = be16_to_cpu(fc_hdr->fh_ox_id); - if (!phba->targetport) { + if (!nvmebuf || !phba->targetport) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6154 LS Drop IO x%x\n", oxid); + "6154 LS Drop IO\n"); oxid = 0; size = 0; sid = 0; @@ -1971,6 +1961,9 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, goto dropit; } + fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt); + oxid = be16_to_cpu(fc_hdr->fh_ox_id); + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; payload = (uint32_t *)(nvmebuf->dbuf.virt); size = bf_get(lpfc_rcqe_length, &nvmebuf->cq_event.cqe.rcqe_cmpl); @@ -2326,7 +2319,6 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, ctxp->state, ctxp->entry_cnt, ctxp->oxid); } ctxp->wqeq = NULL; - ctxp->txrdy = NULL; ctxp->offset = 0; ctxp->phba = phba; ctxp->size = size; @@ -2401,6 +2393,11 @@ lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, d_buf = piocb->context2; nvmebuf = container_of(d_buf, struct hbq_dmabuf, dbuf); + if (!nvmebuf) { + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, + "3015 LS Drop IO\n"); + return; + } if (phba->nvmet_support == 0) { lpfc_in_buf_free(phba, &nvmebuf->dbuf); return; @@ -2429,6 +2426,11 @@ lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba, uint64_t isr_timestamp, uint8_t cqflag) { + if (!nvmebuf) { + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, + "3167 NVMET FCP Drop IO\n"); + return; + } if (phba->nvmet_support == 0) { lpfc_rq_buf_free(phba, &nvmebuf->hbuf); return; @@ -2595,7 +2597,6 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, struct scatterlist *sgel; union lpfc_wqe128 *wqe; struct ulp_bde64 *bde; - uint32_t *txrdy; dma_addr_t physaddr; int i, cnt; int do_pbde; @@ -2757,23 +2758,11 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, &lpfc_treceive_cmd_template.words[3], sizeof(uint32_t) * 9); - /* Words 0 - 2 : The first sg segment */ - txrdy = dma_pool_alloc(phba->txrdy_payload_pool, - GFP_KERNEL, &physaddr); - if (!txrdy) { - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6041 Bad txrdy buffer: oxid x%x\n", - ctxp->oxid); - return NULL; - } - ctxp->txrdy = txrdy; - ctxp->txrdy_phys = physaddr; - wqe->fcp_treceive.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64; - wqe->fcp_treceive.bde.tus.f.bdeSize = TXRDY_PAYLOAD_LEN; - wqe->fcp_treceive.bde.addrLow = - cpu_to_le32(putPaddrLow(physaddr)); - wqe->fcp_treceive.bde.addrHigh = - cpu_to_le32(putPaddrHigh(physaddr)); + /* Words 0 - 2 : First SGE is skipped, set invalid BDE type */ + wqe->fcp_treceive.bde.tus.f.bdeFlags = LPFC_SGE_TYPE_SKIP; + wqe->fcp_treceive.bde.tus.f.bdeSize = 0; + wqe->fcp_treceive.bde.addrLow = 0; + wqe->fcp_treceive.bde.addrHigh = 0; /* Word 4 */ wqe->fcp_treceive.relative_offset = ctxp->offset; @@ -2808,17 +2797,13 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, /* Word 12 */ wqe->fcp_tsend.fcp_data_len = rsp->transfer_length; - /* Setup 1 TXRDY and 1 SKIP SGE */ - txrdy[0] = 0; - txrdy[1] = cpu_to_be32(rsp->transfer_length); - txrdy[2] = 0; - - sgl->addr_hi = putPaddrHigh(physaddr); - sgl->addr_lo = putPaddrLow(physaddr); + /* Setup 2 SKIP SGEs */ + sgl->addr_hi = 0; + sgl->addr_lo = 0; sgl->word2 = 0; - bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA); + bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_SKIP); sgl->word2 = cpu_to_le32(sgl->word2); - sgl->sge_len = cpu_to_le32(TXRDY_PAYLOAD_LEN); + sgl->sge_len = 0; sgl++; sgl->addr_hi = 0; sgl->addr_lo = 0; @@ -3239,9 +3224,9 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, { struct lpfc_nvmet_tgtport *tgtp; struct lpfc_iocbq *abts_wqeq; - union lpfc_wqe128 *abts_wqe; struct lpfc_nodelist *ndlp; unsigned long flags; + u8 opt; int rc; tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; @@ -3280,8 +3265,8 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, return 0; } abts_wqeq = ctxp->abort_wqeq; - abts_wqe = &abts_wqeq->wqe; ctxp->state = LPFC_NVMET_STE_ABORT; + opt = (ctxp->flag & LPFC_NVMET_ABTS_RCV) ? INHIBIT_ABORT : 0; spin_unlock_irqrestore(&ctxp->ctxlock, flags); /* Announce entry to new IO submit field. */ @@ -3327,40 +3312,12 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, /* Ready - mark outstanding as aborted by driver. */ abts_wqeq->iocb_flag |= LPFC_DRIVER_ABORTED; - /* WQEs are reused. Clear stale data and set key fields to - * zero like ia, iaab, iaar, xri_tag, and ctxt_tag. - */ - memset(abts_wqe, 0, sizeof(*abts_wqe)); - - /* word 3 */ - bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG); - - /* word 7 */ - bf_set(wqe_ct, &abts_wqe->abort_cmd.wqe_com, 0); - bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX); - - /* word 8 - tell the FW to abort the IO associated with this - * outstanding exchange ID. - */ - abts_wqe->abort_cmd.wqe_com.abort_tag = ctxp->wqeq->sli4_xritag; - - /* word 9 - this is the iotag for the abts_wqe completion. */ - bf_set(wqe_reqtag, &abts_wqe->abort_cmd.wqe_com, - abts_wqeq->iotag); - - /* word 10 */ - bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1); - bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE); - - /* word 11 */ - bf_set(wqe_cmd_type, &abts_wqe->abort_cmd.wqe_com, OTHER_COMMAND); - bf_set(wqe_wqec, &abts_wqe->abort_cmd.wqe_com, 1); - bf_set(wqe_cqid, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_CQ_ID_DEFAULT); + lpfc_nvme_prep_abort_wqe(abts_wqeq, ctxp->wqeq->sli4_xritag, opt); /* ABTS WQE must go to the same WQ as the WQE to be aborted */ abts_wqeq->hba_wqidx = ctxp->wqeq->hba_wqidx; abts_wqeq->wqe_cmpl = lpfc_nvmet_sol_fcp_abort_cmp; - abts_wqeq->iocb_cmpl = 0; + abts_wqeq->iocb_cmpl = NULL; abts_wqeq->iocb_flag |= LPFC_IO_NVME; abts_wqeq->context2 = ctxp; abts_wqeq->vport = phba->pport; @@ -3495,7 +3452,7 @@ lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba, spin_lock_irqsave(&phba->hbalock, flags); abts_wqeq->wqe_cmpl = lpfc_nvmet_xmt_ls_abort_cmp; - abts_wqeq->iocb_cmpl = 0; + abts_wqeq->iocb_cmpl = NULL; abts_wqeq->iocb_flag |= LPFC_IO_NVME_LS; rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, abts_wqeq); spin_unlock_irqrestore(&phba->hbalock, flags); diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h index 8ff67deac10a..b80b1639b9a7 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.h +++ b/drivers/scsi/lpfc/lpfc_nvmet.h @@ -112,9 +112,7 @@ struct lpfc_nvmet_rcv_ctx { struct lpfc_hba *phba; struct lpfc_iocbq *wqeq; struct lpfc_iocbq *abort_wqeq; - dma_addr_t txrdy_phys; spinlock_t ctxlock; /* protect flag access */ - uint32_t *txrdy; uint32_t sid; uint32_t offset; uint16_t oxid; diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 6822cd9ff8f1..b138d9fee675 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -134,21 +134,21 @@ lpfc_sli4_set_rsp_sgl_last(struct lpfc_hba *phba, /** * lpfc_update_stats - Update statistical data for the command completion - * @phba: Pointer to HBA object. + * @vport: The virtual port on which this call is executing. * @lpfc_cmd: lpfc scsi command object pointer. * * This function is called when there is a command completion and this * function updates the statistical data for the command completion. **/ static void -lpfc_update_stats(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd) +lpfc_update_stats(struct lpfc_vport *vport, struct lpfc_io_buf *lpfc_cmd) { + struct lpfc_hba *phba = vport->phba; struct lpfc_rport_data *rdata; struct lpfc_nodelist *pnode; struct scsi_cmnd *cmd = lpfc_cmd->pCmd; unsigned long flags; - struct Scsi_Host *shost = cmd->device->host; - struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); unsigned long latency; int i; @@ -526,7 +526,7 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba, &qp->lpfc_abts_io_buf_list, list) { if (psb->cur_iocbq.sli4_xritag == xri) { list_del_init(&psb->list); - psb->exch_busy = 0; + psb->flags &= ~LPFC_SBUF_XBUSY; psb->status = IOSTAT_SUCCESS; if (psb->cur_iocbq.iocb_flag == LPFC_IO_NVME) { qp->abts_nvme_io_bufs--; @@ -566,7 +566,7 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba, if (iocbq->sli4_xritag != xri) continue; psb = container_of(iocbq, struct lpfc_io_buf, cur_iocbq); - psb->exch_busy = 0; + psb->flags &= ~LPFC_SBUF_XBUSY; spin_unlock_irqrestore(&phba->hbalock, iflag); if (!list_empty(&pring->txq)) lpfc_worker_wake_up(phba); @@ -786,7 +786,7 @@ lpfc_release_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_io_buf *psb) psb->prot_seg_cnt = 0; qp = psb->hdwq; - if (psb->exch_busy) { + if (psb->flags & LPFC_SBUF_XBUSY) { spin_lock_irqsave(&qp->abts_io_buf_list_lock, iflag); psb->pCmd = NULL; list_add_tail(&psb->list, &qp->lpfc_abts_io_buf_list); @@ -3812,7 +3812,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, /* Sanity check on return of outstanding command */ cmd = lpfc_cmd->pCmd; - if (!cmd) { + if (!cmd || !phba) { lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, "2621 IO completion: Not an active IO\n"); spin_unlock(&lpfc_cmd->buf_lock); @@ -3824,7 +3824,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, phba->sli4_hba.hdwq[idx].scsi_cstat.io_cmpls++; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS - if (phba->cpucheck_on & LPFC_CHECK_SCSI_IO) { + if (unlikely(phba->cpucheck_on & LPFC_CHECK_SCSI_IO)) { cpu = raw_smp_processor_id(); if (cpu < LPFC_CHECK_CPU_CNT && phba->sli4_hba.hdwq) phba->sli4_hba.hdwq[idx].cpucheck_cmpl_io[cpu]++; @@ -3835,7 +3835,10 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, lpfc_cmd->result = (pIocbOut->iocb.un.ulpWord[4] & IOERR_PARAM_MASK); lpfc_cmd->status = pIocbOut->iocb.ulpStatus; /* pick up SLI4 exhange busy status from HBA */ - lpfc_cmd->exch_busy = pIocbOut->iocb_flag & LPFC_EXCHANGE_BUSY; + if (pIocbOut->iocb_flag & LPFC_EXCHANGE_BUSY) + lpfc_cmd->flags |= LPFC_SBUF_XBUSY; + else + lpfc_cmd->flags &= ~LPFC_SBUF_XBUSY; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (lpfc_cmd->prot_data_type) { @@ -3869,7 +3872,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, } #endif - if (lpfc_cmd->status) { + if (unlikely(lpfc_cmd->status)) { if (lpfc_cmd->status == IOSTAT_LOCAL_REJECT && (lpfc_cmd->result & IOERR_DRVR_MASK)) lpfc_cmd->status = IOSTAT_DRIVER_REJECT; @@ -4002,7 +4005,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, scsi_get_resid(cmd)); } - lpfc_update_stats(phba, lpfc_cmd); + lpfc_update_stats(vport, lpfc_cmd); if (vport->cfg_max_scsicmpl_time && time_after(jiffies, lpfc_cmd->start_time + msecs_to_jiffies(vport->cfg_max_scsicmpl_time))) { @@ -4610,17 +4613,18 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) err = lpfc_scsi_prep_dma_buf(phba, lpfc_cmd); } - if (err == 2) { - cmnd->result = DID_ERROR << 16; - goto out_fail_command_release_buf; - } else if (err) { + if (unlikely(err)) { + if (err == 2) { + cmnd->result = DID_ERROR << 16; + goto out_fail_command_release_buf; + } goto out_host_busy_free_buf; } lpfc_scsi_prep_cmnd(vport, lpfc_cmd, ndlp); #ifdef CONFIG_SCSI_LPFC_DEBUG_FS - if (phba->cpucheck_on & LPFC_CHECK_SCSI_IO) { + if (unlikely(phba->cpucheck_on & LPFC_CHECK_SCSI_IO)) { cpu = raw_smp_processor_id(); if (cpu < LPFC_CHECK_CPU_CNT) { struct lpfc_sli4_hdw_queue *hdwq = @@ -4843,20 +4847,21 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) ret_val = __lpfc_sli_issue_iocb(phba, LPFC_FCP_RING, abtsiocb, 0); } - /* no longer need the lock after this point */ - spin_unlock_irqrestore(&phba->hbalock, flags); if (ret_val == IOCB_ERROR) { /* Indicate the IO is not being aborted by the driver. */ iocb->iocb_flag &= ~LPFC_DRIVER_ABORTED; lpfc_cmd->waitq = NULL; spin_unlock(&lpfc_cmd->buf_lock); + spin_unlock_irqrestore(&phba->hbalock, flags); lpfc_sli_release_iocbq(phba, abtsiocb); ret = FAILED; goto out; } + /* no longer need the lock after this point */ spin_unlock(&lpfc_cmd->buf_lock); + spin_unlock_irqrestore(&phba->hbalock, flags); if (phba->cfg_poll & DISABLE_FCP_RING_INT) lpfc_sli_handle_fast_ring_event(phba, diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 614f78dddafe..c82b5792da98 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -87,6 +87,10 @@ static void lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe); static bool lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba); static bool lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba); +static struct lpfc_cqe *lpfc_sli4_cq_get(struct lpfc_queue *q); +static void __lpfc_sli4_consume_cqe(struct lpfc_hba *phba, + struct lpfc_queue *cq, + struct lpfc_cqe *cqe); static IOCB_t * lpfc_get_iocb_from_iocbq(struct lpfc_iocbq *iocbq) @@ -467,25 +471,52 @@ __lpfc_sli4_consume_eqe(struct lpfc_hba *phba, struct lpfc_queue *eq, } static void -lpfc_sli4_eq_flush(struct lpfc_hba *phba, struct lpfc_queue *eq) +lpfc_sli4_eqcq_flush(struct lpfc_hba *phba, struct lpfc_queue *eq) { - struct lpfc_eqe *eqe; - uint32_t count = 0; + struct lpfc_eqe *eqe = NULL; + u32 eq_count = 0, cq_count = 0; + struct lpfc_cqe *cqe = NULL; + struct lpfc_queue *cq = NULL, *childq = NULL; + int cqid = 0; /* walk all the EQ entries and drop on the floor */ eqe = lpfc_sli4_eq_get(eq); while (eqe) { + /* Get the reference to the corresponding CQ */ + cqid = bf_get_le32(lpfc_eqe_resource_id, eqe); + cq = NULL; + + list_for_each_entry(childq, &eq->child_list, list) { + if (childq->queue_id == cqid) { + cq = childq; + break; + } + } + /* If CQ is valid, iterate through it and drop all the CQEs */ + if (cq) { + cqe = lpfc_sli4_cq_get(cq); + while (cqe) { + __lpfc_sli4_consume_cqe(phba, cq, cqe); + cq_count++; + cqe = lpfc_sli4_cq_get(cq); + } + /* Clear and re-arm the CQ */ + phba->sli4_hba.sli4_write_cq_db(phba, cq, cq_count, + LPFC_QUEUE_REARM); + cq_count = 0; + } __lpfc_sli4_consume_eqe(phba, eq, eqe); - count++; + eq_count++; eqe = lpfc_sli4_eq_get(eq); } /* Clear and re-arm the EQ */ - phba->sli4_hba.sli4_write_eq_db(phba, eq, count, LPFC_QUEUE_REARM); + phba->sli4_hba.sli4_write_eq_db(phba, eq, eq_count, LPFC_QUEUE_REARM); } static int -lpfc_sli4_process_eq(struct lpfc_hba *phba, struct lpfc_queue *eq) +lpfc_sli4_process_eq(struct lpfc_hba *phba, struct lpfc_queue *eq, + uint8_t rearm) { struct lpfc_eqe *eqe; int count = 0, consumed = 0; @@ -519,8 +550,8 @@ lpfc_sli4_process_eq(struct lpfc_hba *phba, struct lpfc_queue *eq) eq->queue_claimed = 0; rearm_and_exit: - /* Always clear and re-arm the EQ */ - phba->sli4_hba.sli4_write_eq_db(phba, eq, consumed, LPFC_QUEUE_REARM); + /* Always clear the EQ. */ + phba->sli4_hba.sli4_write_eq_db(phba, eq, consumed, rearm); return count; } @@ -2526,6 +2557,8 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } else { __lpfc_sli_rpi_release(vport, ndlp); } + if (vport->load_flag & FC_UNLOADING) + lpfc_nlp_put(ndlp); pmb->ctx_ndlp = NULL; } } @@ -2672,7 +2705,8 @@ lpfc_sli_handle_mb_event(struct lpfc_hba *phba) lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI, "(%d):0323 Unknown Mailbox command " "x%x (x%x/x%x) Cmpl\n", - pmb->vport ? pmb->vport->vpi : 0, + pmb->vport ? pmb->vport->vpi : + LPFC_VPORT_UNKNOWN, pmbox->mbxCommand, lpfc_sli_config_mbox_subsys_get(phba, pmb), @@ -2693,7 +2727,8 @@ lpfc_sli_handle_mb_event(struct lpfc_hba *phba) "(%d):0305 Mbox cmd cmpl " "error - RETRYing Data: x%x " "(x%x/x%x) x%x x%x x%x\n", - pmb->vport ? pmb->vport->vpi : 0, + pmb->vport ? pmb->vport->vpi : + LPFC_VPORT_UNKNOWN, pmbox->mbxCommand, lpfc_sli_config_mbox_subsys_get(phba, pmb), @@ -2701,7 +2736,8 @@ lpfc_sli_handle_mb_event(struct lpfc_hba *phba) pmb), pmbox->mbxStatus, pmbox->un.varWords[0], - pmb->vport->port_state); + pmb->vport ? pmb->vport->port_state : + LPFC_VPORT_UNKNOWN); pmbox->mbxStatus = 0; pmbox->mbxOwner = OWN_HOST; rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT); @@ -6167,6 +6203,14 @@ lpfc_set_features(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox, mbox->u.mqe.un.set_feature.feature = LPFC_SET_MDS_DIAGS; mbox->u.mqe.un.set_feature.param_len = 8; break; + case LPFC_SET_DUAL_DUMP: + bf_set(lpfc_mbx_set_feature_dd, + &mbox->u.mqe.un.set_feature, LPFC_ENABLE_DUAL_DUMP); + bf_set(lpfc_mbx_set_feature_ddquery, + &mbox->u.mqe.un.set_feature, 0); + mbox->u.mqe.un.set_feature.feature = LPFC_SET_DUAL_DUMP; + mbox->u.mqe.un.set_feature.param_len = 4; + break; } return; @@ -6184,11 +6228,16 @@ lpfc_ras_stop_fwlog(struct lpfc_hba *phba) { struct lpfc_ras_fwlog *ras_fwlog = &phba->ras_fwlog; - ras_fwlog->ras_active = false; + spin_lock_irq(&phba->hbalock); + ras_fwlog->state = INACTIVE; + spin_unlock_irq(&phba->hbalock); /* Disable FW logging to host memory */ writel(LPFC_CTL_PDEV_CTL_DDL_RAS, phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PDEV_CTL_OFFSET); + + /* Wait 10ms for firmware to stop using DMA buffer */ + usleep_range(10 * 1000, 20 * 1000); } /** @@ -6224,7 +6273,9 @@ lpfc_sli4_ras_dma_free(struct lpfc_hba *phba) ras_fwlog->lwpd.virt = NULL; } - ras_fwlog->ras_active = false; + spin_lock_irq(&phba->hbalock); + ras_fwlog->state = INACTIVE; + spin_unlock_irq(&phba->hbalock); } /** @@ -6326,7 +6377,9 @@ lpfc_sli4_ras_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) goto disable_ras; } - ras_fwlog->ras_active = true; + spin_lock_irq(&phba->hbalock); + ras_fwlog->state = ACTIVE; + spin_unlock_irq(&phba->hbalock); mempool_free(pmb, phba->mbox_mem_pool); return; @@ -6358,6 +6411,10 @@ lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba, uint32_t len = 0, fwlog_buffsize, fwlog_entry_count; int rc = 0; + spin_lock_irq(&phba->hbalock); + ras_fwlog->state = INACTIVE; + spin_unlock_irq(&phba->hbalock); + fwlog_buffsize = (LPFC_RAS_MIN_BUFF_POST_SIZE * phba->cfg_ras_fwlog_buffsize); fwlog_entry_count = (fwlog_buffsize/LPFC_RAS_MAX_ENTRY_SIZE); @@ -6417,6 +6474,9 @@ lpfc_sli4_ras_fwlog_init(struct lpfc_hba *phba, mbx_fwlog->u.request.lwpd.addr_lo = putPaddrLow(ras_fwlog->lwpd.phys); mbx_fwlog->u.request.lwpd.addr_hi = putPaddrHigh(ras_fwlog->lwpd.phys); + spin_lock_irq(&phba->hbalock); + ras_fwlog->state = REG_INPROGRESS; + spin_unlock_irq(&phba->hbalock); mbox->vport = phba->pport; mbox->mbox_cmpl = lpfc_sli4_ras_mbox_cmpl; @@ -7148,7 +7208,7 @@ lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq, int lpfc_sli4_hba_setup(struct lpfc_hba *phba) { - int rc, i, cnt, len; + int rc, i, cnt, len, dd; LPFC_MBOXQ_t *mboxq; struct lpfc_mqe *mqe; uint8_t *vpd; @@ -7399,6 +7459,23 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) phba->sli3_options |= (LPFC_SLI3_NPIV_ENABLED | LPFC_SLI3_HBQ_ENABLED); spin_unlock_irq(&phba->hbalock); + /* Always try to enable dual dump feature if we can */ + lpfc_set_features(phba, mboxq, LPFC_SET_DUAL_DUMP); + rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); + dd = bf_get(lpfc_mbx_set_feature_dd, &mboxq->u.mqe.un.set_feature); + if ((rc == MBX_SUCCESS) && (dd == LPFC_ENABLE_DUAL_DUMP)) + lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_INIT, + "6448 Dual Dump is enabled\n"); + else + lpfc_printf_log(phba, KERN_INFO, LOG_SLI | LOG_INIT, + "6447 Dual Dump Mailbox x%x (x%x/x%x) failed, " + "rc:x%x dd:x%x\n", + bf_get(lpfc_mqe_command, &mboxq->u.mqe), + lpfc_sli_config_mbox_subsys_get( + phba, mboxq), + lpfc_sli_config_mbox_opcode_get( + phba, mboxq), + rc, dd); /* * Allocate all resources (xri,rpi,vpi,vfi) now. Subsequent * calls depends on these resources to complete port setup. @@ -7523,9 +7600,11 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) } phba->sli4_hba.nvmet_xri_cnt = rc; - cnt = phba->cfg_iocb_cnt * 1024; - /* We need 1 iocbq for every SGL, for IO processing */ - cnt += phba->sli4_hba.nvmet_xri_cnt; + /* We allocate an iocbq for every receive context SGL. + * The additional allocation is for abort and ls handling. + */ + cnt = phba->sli4_hba.nvmet_xri_cnt + + phba->sli4_hba.max_cfg_param.max_xri; } else { /* update host common xri-sgl sizes and mappings */ rc = lpfc_sli4_io_sgl_update(phba); @@ -7547,14 +7626,17 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) rc = -ENODEV; goto out_destroy_queue; } - cnt = phba->cfg_iocb_cnt * 1024; + /* Each lpfc_io_buf job structure has an iocbq element. + * This cnt provides for abort, els, ct and ls requests. + */ + cnt = phba->sli4_hba.max_cfg_param.max_xri; } if (!phba->sli.iocbq_lookup) { /* Initialize and populate the iocb list per host */ lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "2821 initialize iocb list %d total %d\n", - phba->cfg_iocb_cnt, cnt); + "2821 initialize iocb list with %d entries\n", + cnt); rc = lpfc_init_iocb_list(phba, cnt); if (rc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, @@ -7892,7 +7974,7 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba) if (mbox_pending) /* process and rearm the EQ */ - lpfc_sli4_process_eq(phba, fpeq); + lpfc_sli4_process_eq(phba, fpeq, LPFC_QUEUE_REARM); else /* Always clear and re-arm the EQ */ sli4_hba->sli4_write_eq_db(phba, fpeq, 0, LPFC_QUEUE_REARM); @@ -8964,7 +9046,8 @@ lpfc_mbox_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp) * @pring: Pointer to driver SLI ring object. * @piocb: Pointer to address of newly added command iocb. * - * This function is called with hbalock held to add a command + * This function is called with hbalock held for SLI3 ports or + * the ring lock held for SLI4 ports to add a command * iocb to the txq when SLI layer cannot submit the command iocb * to the ring. **/ @@ -8972,7 +9055,10 @@ void __lpfc_sli_ringtx_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, struct lpfc_iocbq *piocb) { - lockdep_assert_held(&phba->hbalock); + if (phba->sli_rev == LPFC_SLI_REV4) + lockdep_assert_held(&pring->ring_lock); + else + lockdep_assert_held(&phba->hbalock); /* Insert the caller's iocb in the txq tail for later processing. */ list_add_tail(&piocb->list, &pring->txq); } @@ -9863,7 +9949,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq, * __lpfc_sli_issue_iocb_s4 is used by other functions in the driver to issue * an iocb command to an HBA with SLI-4 interface spec. * - * This function is called with hbalock held. The function will return success + * This function is called with ringlock held. The function will return success * after it successfully submit the iocb to firmware or after adding to the * txq. **/ @@ -10053,10 +10139,13 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number, struct lpfc_iocbq *piocb, uint32_t flag) { struct lpfc_sli_ring *pring; + struct lpfc_queue *eq; unsigned long iflags; int rc; if (phba->sli_rev == LPFC_SLI_REV4) { + eq = phba->sli4_hba.hdwq[piocb->hba_wqidx].hba_eq; + pring = lpfc_sli4_calc_ring(phba, piocb); if (unlikely(pring == NULL)) return IOCB_ERROR; @@ -10064,6 +10153,8 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number, spin_lock_irqsave(&pring->ring_lock, iflags); rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag); spin_unlock_irqrestore(&pring->ring_lock, iflags); + + lpfc_sli4_poll_eq(eq, LPFC_POLL_FASTPATH); } else { /* For now, SLI2/3 will still use hbalock */ spin_lock_irqsave(&phba->hbalock, iflags); @@ -10678,14 +10769,14 @@ lpfc_sli_host_down(struct lpfc_vport *vport) set_bit(LPFC_DATA_READY, &phba->data_flags); } prev_pring_flag = pring->flag; - spin_lock_irq(&pring->ring_lock); + spin_lock(&pring->ring_lock); list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) { if (iocb->vport != vport) continue; list_move_tail(&iocb->list, &completions); } - spin_unlock_irq(&pring->ring_lock); + spin_unlock(&pring->ring_lock); list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) { if (iocb->vport != vport) @@ -11050,9 +11141,6 @@ lpfc_sli_abort_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, irsp->ulpStatus, irsp->un.ulpWord[4]); spin_unlock_irq(&phba->hbalock); - if (irsp->ulpStatus == IOSTAT_LOCAL_REJECT && - irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) - lpfc_sli_release_iocbq(phba, abort_iocb); } release_iocb: lpfc_sli_release_iocbq(phba, cmdiocb); @@ -11736,7 +11824,10 @@ lpfc_sli_wake_iocb_wait(struct lpfc_hba *phba, !(cmdiocbq->iocb_flag & LPFC_IO_LIBDFC)) { lpfc_cmd = container_of(cmdiocbq, struct lpfc_io_buf, cur_iocbq); - lpfc_cmd->exch_busy = rspiocbq->iocb_flag & LPFC_EXCHANGE_BUSY; + if (rspiocbq && (rspiocbq->iocb_flag & LPFC_EXCHANGE_BUSY)) + lpfc_cmd->flags |= LPFC_SBUF_XBUSY; + else + lpfc_cmd->flags &= ~LPFC_SBUF_XBUSY; } pdone_q = cmdiocbq->context_un.wait_queue; @@ -13158,13 +13249,19 @@ send_current_mbox: phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; /* Setting active mailbox pointer need to be in sync to flag clear */ phba->sli.mbox_active = NULL; + if (bf_get(lpfc_trailer_consumed, mcqe)) + lpfc_sli4_mq_release(phba->sli4_hba.mbx_wq); spin_unlock_irqrestore(&phba->hbalock, iflags); /* Wake up worker thread to post the next pending mailbox command */ lpfc_worker_wake_up(phba); + return workposted; + out_no_mqe_complete: + spin_lock_irqsave(&phba->hbalock, iflags); if (bf_get(lpfc_trailer_consumed, mcqe)) lpfc_sli4_mq_release(phba->sli4_hba.mbx_wq); - return workposted; + spin_unlock_irqrestore(&phba->hbalock, iflags); + return false; } /** @@ -13217,7 +13314,6 @@ lpfc_sli4_sp_handle_els_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq, struct lpfc_sli_ring *pring = cq->pring; int txq_cnt = 0; int txcmplq_cnt = 0; - int fcp_txcmplq_cnt = 0; /* Check for response status */ if (unlikely(bf_get(lpfc_wcqe_c_status, wcqe))) { @@ -13239,9 +13335,8 @@ lpfc_sli4_sp_handle_els_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq, txcmplq_cnt++; lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "0387 NO IOCBQ data: txq_cnt=%d iocb_cnt=%d " - "fcp_txcmplq_cnt=%d, els_txcmplq_cnt=%d\n", + "els_txcmplq_cnt=%d\n", txq_cnt, phba->iocb_cnt, - fcp_txcmplq_cnt, txcmplq_cnt); return false; } @@ -13592,6 +13687,7 @@ __lpfc_sli4_process_cq(struct lpfc_hba *phba, struct lpfc_queue *cq, phba->sli4_hba.sli4_write_cq_db(phba, cq, consumed, LPFC_QUEUE_NOARM); consumed = 0; + cq->assoc_qp->q_flag |= HBA_EQ_DELAY_CHK; } if (count == LPFC_NVMET_CQ_NOTIFY) @@ -14220,7 +14316,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id) spin_lock_irqsave(&phba->hbalock, iflag); if (phba->link_state < LPFC_LINK_DOWN) /* Flush, clear interrupt, and rearm the EQ */ - lpfc_sli4_eq_flush(phba, fpeq); + lpfc_sli4_eqcq_flush(phba, fpeq); spin_unlock_irqrestore(&phba->hbalock, iflag); return IRQ_NONE; } @@ -14230,14 +14326,14 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id) fpeq->last_cpu = raw_smp_processor_id(); if (icnt > LPFC_EQD_ISR_TRIGGER && - phba->cfg_irq_chann == 1 && + fpeq->q_flag & HBA_EQ_DELAY_CHK && phba->cfg_auto_imax && fpeq->q_mode != LPFC_MAX_AUTO_EQ_DELAY && phba->sli.sli_flag & LPFC_SLI_USE_EQDR) lpfc_sli4_mod_hba_eq_delay(phba, fpeq, LPFC_MAX_AUTO_EQ_DELAY); /* process and rearm the EQ */ - ecount = lpfc_sli4_process_eq(phba, fpeq); + ecount = lpfc_sli4_process_eq(phba, fpeq, LPFC_QUEUE_REARM); if (unlikely(ecount == 0)) { fpeq->EQ_no_entry++; @@ -14297,6 +14393,147 @@ lpfc_sli4_intr_handler(int irq, void *dev_id) return (hba_handled == true) ? IRQ_HANDLED : IRQ_NONE; } /* lpfc_sli4_intr_handler */ +void lpfc_sli4_poll_hbtimer(struct timer_list *t) +{ + struct lpfc_hba *phba = from_timer(phba, t, cpuhp_poll_timer); + struct lpfc_queue *eq; + int i = 0; + + rcu_read_lock(); + + list_for_each_entry_rcu(eq, &phba->poll_list, _poll_list) + i += lpfc_sli4_poll_eq(eq, LPFC_POLL_SLOWPATH); + if (!list_empty(&phba->poll_list)) + mod_timer(&phba->cpuhp_poll_timer, + jiffies + msecs_to_jiffies(LPFC_POLL_HB)); + + rcu_read_unlock(); +} + +inline int lpfc_sli4_poll_eq(struct lpfc_queue *eq, uint8_t path) +{ + struct lpfc_hba *phba = eq->phba; + int i = 0; + + /* + * Unlocking an irq is one of the entry point to check + * for re-schedule, but we are good for io submission + * path as midlayer does a get_cpu to glue us in. Flush + * out the invalidate queue so we can see the updated + * value for flag. + */ + smp_rmb(); + + if (READ_ONCE(eq->mode) == LPFC_EQ_POLL) + /* We will not likely get the completion for the caller + * during this iteration but i guess that's fine. + * Future io's coming on this eq should be able to + * pick it up. As for the case of single io's, they + * will be handled through a sched from polling timer + * function which is currently triggered every 1msec. + */ + i = lpfc_sli4_process_eq(phba, eq, LPFC_QUEUE_NOARM); + + return i; +} + +static inline void lpfc_sli4_add_to_poll_list(struct lpfc_queue *eq) +{ + struct lpfc_hba *phba = eq->phba; + + if (list_empty(&phba->poll_list)) { + timer_setup(&phba->cpuhp_poll_timer, lpfc_sli4_poll_hbtimer, 0); + /* kickstart slowpath processing for this eq */ + mod_timer(&phba->cpuhp_poll_timer, + jiffies + msecs_to_jiffies(LPFC_POLL_HB)); + } + + list_add_rcu(&eq->_poll_list, &phba->poll_list); + synchronize_rcu(); +} + +static inline void lpfc_sli4_remove_from_poll_list(struct lpfc_queue *eq) +{ + struct lpfc_hba *phba = eq->phba; + + /* Disable slowpath processing for this eq. Kick start the eq + * by RE-ARMING the eq's ASAP + */ + list_del_rcu(&eq->_poll_list); + synchronize_rcu(); + + if (list_empty(&phba->poll_list)) + del_timer_sync(&phba->cpuhp_poll_timer); +} + +void lpfc_sli4_cleanup_poll_list(struct lpfc_hba *phba) +{ + struct lpfc_queue *eq, *next; + + list_for_each_entry_safe(eq, next, &phba->poll_list, _poll_list) + list_del(&eq->_poll_list); + + INIT_LIST_HEAD(&phba->poll_list); + synchronize_rcu(); +} + +static inline void +__lpfc_sli4_switch_eqmode(struct lpfc_queue *eq, uint8_t mode) +{ + if (mode == eq->mode) + return; + /* + * currently this function is only called during a hotplug + * event and the cpu on which this function is executing + * is going offline. By now the hotplug has instructed + * the scheduler to remove this cpu from cpu active mask. + * So we don't need to work about being put aside by the + * scheduler for a high priority process. Yes, the inte- + * rrupts could come but they are known to retire ASAP. + */ + + /* Disable polling in the fastpath */ + WRITE_ONCE(eq->mode, mode); + /* flush out the store buffer */ + smp_wmb(); + + /* + * Add this eq to the polling list and start polling. For + * a grace period both interrupt handler and poller will + * try to process the eq _but_ that's fine. We have a + * synchronization mechanism in place (queue_claimed) to + * deal with it. This is just a draining phase for int- + * errupt handler (not eq's) as we have guranteed through + * barrier that all the CPUs have seen the new CQ_POLLED + * state. which will effectively disable the REARMING of + * the EQ. The whole idea is eq's die off eventually as + * we are not rearming EQ's anymore. + */ + mode ? lpfc_sli4_add_to_poll_list(eq) : + lpfc_sli4_remove_from_poll_list(eq); +} + +void lpfc_sli4_start_polling(struct lpfc_queue *eq) +{ + __lpfc_sli4_switch_eqmode(eq, LPFC_EQ_POLL); +} + +void lpfc_sli4_stop_polling(struct lpfc_queue *eq) +{ + struct lpfc_hba *phba = eq->phba; + + __lpfc_sli4_switch_eqmode(eq, LPFC_EQ_INTERRUPT); + + /* Kick start for the pending io's in h/w. + * Once we switch back to interrupt processing on a eq + * the io path completion will only arm eq's when it + * receives a completion. But since eq's are in disa- + * rmed state it doesn't receive a completion. This + * creates a deadlock scenaro. + */ + phba->sli4_hba.sli4_write_eq_db(phba, eq, 0, LPFC_QUEUE_REARM); +} + /** * lpfc_sli4_queue_free - free a queue structure and associated memory * @queue: The queue structure to free. @@ -14371,6 +14608,7 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size, return NULL; INIT_LIST_HEAD(&queue->list); + INIT_LIST_HEAD(&queue->_poll_list); INIT_LIST_HEAD(&queue->wq_list); INIT_LIST_HEAD(&queue->wqfull_list); INIT_LIST_HEAD(&queue->page_list); @@ -18124,8 +18362,9 @@ lpfc_sli4_alloc_rpi(struct lpfc_hba *phba) phba->sli4_hba.max_cfg_param.rpi_used++; phba->sli4_hba.rpi_count++; } - lpfc_printf_log(phba, KERN_INFO, LOG_SLI, - "0001 rpi:%x max:%x lim:%x\n", + lpfc_printf_log(phba, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, + "0001 Allocated rpi:x%x max:x%x lim:x%x\n", (int) rpi, max_rpi, rpi_limit); /* @@ -18181,11 +18420,19 @@ lpfc_sli4_alloc_rpi(struct lpfc_hba *phba) static void __lpfc_sli4_free_rpi(struct lpfc_hba *phba, int rpi) { + /* + * if the rpi value indicates a prior unreg has already + * been done, skip the unreg. + */ + if (rpi == LPFC_RPI_ALLOC_ERROR) + return; + if (test_and_clear_bit(rpi, phba->sli4_hba.rpi_bmask)) { phba->sli4_hba.rpi_count--; phba->sli4_hba.max_cfg_param.rpi_used--; } else { - lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + lpfc_printf_log(phba, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, "2016 rpi %x not inuse\n", rpi); } @@ -19683,6 +19930,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp, lpfc_sli_ringtxcmpl_put(phba, pring, pwqe); spin_unlock_irqrestore(&pring->ring_lock, iflags); + + lpfc_sli4_poll_eq(qp->hba_eq, LPFC_POLL_FASTPATH); return 0; } @@ -19703,6 +19952,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp, } lpfc_sli_ringtxcmpl_put(phba, pring, pwqe); spin_unlock_irqrestore(&pring->ring_lock, iflags); + + lpfc_sli4_poll_eq(qp->hba_eq, LPFC_POLL_FASTPATH); return 0; } @@ -19731,6 +19982,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp, } lpfc_sli_ringtxcmpl_put(phba, pring, pwqe); spin_unlock_irqrestore(&pring->ring_lock, iflags); + + lpfc_sli4_poll_eq(qp->hba_eq, LPFC_POLL_FASTPATH); return 0; } return WQE_ERROR; @@ -20093,6 +20346,13 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd, lpfc_ncmd->cur_iocbq.wqe_cmpl = NULL; lpfc_ncmd->cur_iocbq.iocb_cmpl = NULL; + if (phba->cfg_xpsgl && !phba->nvmet_support && + !list_empty(&lpfc_ncmd->dma_sgl_xtra_list)) + lpfc_put_sgl_per_hdwq(phba, lpfc_ncmd); + + if (!list_empty(&lpfc_ncmd->dma_cmd_rsp_list)) + lpfc_put_cmd_rsp_buf_per_hdwq(phba, lpfc_ncmd); + if (phba->cfg_xri_rebalancing) { if (lpfc_ncmd->expedite) { /* Return to expedite pool */ @@ -20157,13 +20417,6 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd, spin_unlock_irqrestore(&qp->io_buf_list_put_lock, iflag); } - - if (phba->cfg_xpsgl && !phba->nvmet_support && - !list_empty(&lpfc_ncmd->dma_sgl_xtra_list)) - lpfc_put_sgl_per_hdwq(phba, lpfc_ncmd); - - if (!list_empty(&lpfc_ncmd->dma_cmd_rsp_list)) - lpfc_put_cmd_rsp_buf_per_hdwq(phba, lpfc_ncmd); } /** @@ -20399,8 +20652,9 @@ lpfc_get_sgl_per_hdwq(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_buf) struct sli4_hybrid_sgl *allocated_sgl = NULL; struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq; struct list_head *buf_list = &hdwq->sgl_list; + unsigned long iflags; - spin_lock_irq(&hdwq->hdwq_lock); + spin_lock_irqsave(&hdwq->hdwq_lock, iflags); if (likely(!list_empty(buf_list))) { /* break off 1 chunk from the sgl_list */ @@ -20412,9 +20666,9 @@ lpfc_get_sgl_per_hdwq(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_buf) } } else { /* allocate more */ - spin_unlock_irq(&hdwq->hdwq_lock); + spin_unlock_irqrestore(&hdwq->hdwq_lock, iflags); tmp = kmalloc_node(sizeof(*tmp), GFP_ATOMIC, - cpu_to_node(smp_processor_id())); + cpu_to_node(hdwq->io_wq->chann)); if (!tmp) { lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "8353 error kmalloc memory for HDWQ " @@ -20434,7 +20688,7 @@ lpfc_get_sgl_per_hdwq(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_buf) return NULL; } - spin_lock_irq(&hdwq->hdwq_lock); + spin_lock_irqsave(&hdwq->hdwq_lock, iflags); list_add_tail(&tmp->list_node, &lpfc_buf->dma_sgl_xtra_list); } @@ -20442,7 +20696,7 @@ lpfc_get_sgl_per_hdwq(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_buf) struct sli4_hybrid_sgl, list_node); - spin_unlock_irq(&hdwq->hdwq_lock); + spin_unlock_irqrestore(&hdwq->hdwq_lock, iflags); return allocated_sgl; } @@ -20466,8 +20720,9 @@ lpfc_put_sgl_per_hdwq(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_buf) struct sli4_hybrid_sgl *tmp = NULL; struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq; struct list_head *buf_list = &hdwq->sgl_list; + unsigned long iflags; - spin_lock_irq(&hdwq->hdwq_lock); + spin_lock_irqsave(&hdwq->hdwq_lock, iflags); if (likely(!list_empty(&lpfc_buf->dma_sgl_xtra_list))) { list_for_each_entry_safe(list_entry, tmp, @@ -20480,7 +20735,7 @@ lpfc_put_sgl_per_hdwq(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_buf) rc = -EINVAL; } - spin_unlock_irq(&hdwq->hdwq_lock); + spin_unlock_irqrestore(&hdwq->hdwq_lock, iflags); return rc; } @@ -20501,8 +20756,9 @@ lpfc_free_sgl_per_hdwq(struct lpfc_hba *phba, struct list_head *buf_list = &hdwq->sgl_list; struct sli4_hybrid_sgl *list_entry = NULL; struct sli4_hybrid_sgl *tmp = NULL; + unsigned long iflags; - spin_lock_irq(&hdwq->hdwq_lock); + spin_lock_irqsave(&hdwq->hdwq_lock, iflags); /* Free sgl pool */ list_for_each_entry_safe(list_entry, tmp, @@ -20514,7 +20770,7 @@ lpfc_free_sgl_per_hdwq(struct lpfc_hba *phba, kfree(list_entry); } - spin_unlock_irq(&hdwq->hdwq_lock); + spin_unlock_irqrestore(&hdwq->hdwq_lock, iflags); } /** @@ -20538,8 +20794,9 @@ lpfc_get_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba, struct fcp_cmd_rsp_buf *allocated_buf = NULL; struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq; struct list_head *buf_list = &hdwq->cmd_rsp_buf_list; + unsigned long iflags; - spin_lock_irq(&hdwq->hdwq_lock); + spin_lock_irqsave(&hdwq->hdwq_lock, iflags); if (likely(!list_empty(buf_list))) { /* break off 1 chunk from the list */ @@ -20552,9 +20809,9 @@ lpfc_get_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba, } } else { /* allocate more */ - spin_unlock_irq(&hdwq->hdwq_lock); + spin_unlock_irqrestore(&hdwq->hdwq_lock, iflags); tmp = kmalloc_node(sizeof(*tmp), GFP_ATOMIC, - cpu_to_node(smp_processor_id())); + cpu_to_node(hdwq->io_wq->chann)); if (!tmp) { lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "8355 error kmalloc memory for HDWQ " @@ -20579,7 +20836,7 @@ lpfc_get_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba, tmp->fcp_rsp = (struct fcp_rsp *)((uint8_t *)tmp->fcp_cmnd + sizeof(struct fcp_cmnd)); - spin_lock_irq(&hdwq->hdwq_lock); + spin_lock_irqsave(&hdwq->hdwq_lock, iflags); list_add_tail(&tmp->list_node, &lpfc_buf->dma_cmd_rsp_list); } @@ -20587,7 +20844,7 @@ lpfc_get_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba, struct fcp_cmd_rsp_buf, list_node); - spin_unlock_irq(&hdwq->hdwq_lock); + spin_unlock_irqrestore(&hdwq->hdwq_lock, iflags); return allocated_buf; } @@ -20612,8 +20869,9 @@ lpfc_put_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba, struct fcp_cmd_rsp_buf *tmp = NULL; struct lpfc_sli4_hdw_queue *hdwq = lpfc_buf->hdwq; struct list_head *buf_list = &hdwq->cmd_rsp_buf_list; + unsigned long iflags; - spin_lock_irq(&hdwq->hdwq_lock); + spin_lock_irqsave(&hdwq->hdwq_lock, iflags); if (likely(!list_empty(&lpfc_buf->dma_cmd_rsp_list))) { list_for_each_entry_safe(list_entry, tmp, @@ -20626,7 +20884,7 @@ lpfc_put_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba, rc = -EINVAL; } - spin_unlock_irq(&hdwq->hdwq_lock); + spin_unlock_irqrestore(&hdwq->hdwq_lock, iflags); return rc; } @@ -20647,8 +20905,9 @@ lpfc_free_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba, struct list_head *buf_list = &hdwq->cmd_rsp_buf_list; struct fcp_cmd_rsp_buf *list_entry = NULL; struct fcp_cmd_rsp_buf *tmp = NULL; + unsigned long iflags; - spin_lock_irq(&hdwq->hdwq_lock); + spin_lock_irqsave(&hdwq->hdwq_lock, iflags); /* Free cmd_rsp buf pool */ list_for_each_entry_safe(list_entry, tmp, @@ -20661,5 +20920,5 @@ lpfc_free_cmd_rsp_buf_per_hdwq(struct lpfc_hba *phba, kfree(list_entry); } - spin_unlock_irq(&hdwq->hdwq_lock); + spin_unlock_irqrestore(&hdwq->hdwq_lock, iflags); } diff --git a/drivers/scsi/lpfc/lpfc_sli.h b/drivers/scsi/lpfc/lpfc_sli.h index 37fbcb46387e..7bcf922a8be2 100644 --- a/drivers/scsi/lpfc/lpfc_sli.h +++ b/drivers/scsi/lpfc/lpfc_sli.h @@ -384,14 +384,13 @@ struct lpfc_io_buf { struct lpfc_nodelist *ndlp; uint32_t timeout; - uint16_t flags; /* TBD convert exch_busy to flags */ + uint16_t flags; #define LPFC_SBUF_XBUSY 0x1 /* SLI4 hba reported XB on WCQE cmpl */ #define LPFC_SBUF_BUMP_QDEPTH 0x2 /* bumped queue depth counter */ /* External DIF device IO conversions */ #define LPFC_SBUF_NORMAL_DIF 0x4 /* normal mode to insert/strip */ #define LPFC_SBUF_PASS_DIF 0x8 /* insert/strip mode to passthru */ #define LPFC_SBUF_NOT_POSTED 0x10 /* SGL failed post to FW. */ - uint16_t exch_busy; /* SLI4 hba reported XB on complete WCQE */ uint16_t status; /* From IOCB Word 7- ulpStatus */ uint32_t result; /* From IOCB Word 4. */ diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 0d4882a9e634..d963ca871383 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -41,8 +41,13 @@ /* Multi-queue arrangement for FCP EQ/CQ/WQ tuples */ #define LPFC_HBA_HDWQ_MIN 0 -#define LPFC_HBA_HDWQ_MAX 128 -#define LPFC_HBA_HDWQ_DEF 0 +#define LPFC_HBA_HDWQ_MAX 256 +#define LPFC_HBA_HDWQ_DEF LPFC_HBA_HDWQ_MIN + +/* irq_chann range, values */ +#define LPFC_IRQ_CHANN_MIN 0 +#define LPFC_IRQ_CHANN_MAX 256 +#define LPFC_IRQ_CHANN_DEF LPFC_IRQ_CHANN_MIN /* FCP MQ queue count limiting */ #define LPFC_FCP_MQ_THRESHOLD_MIN 0 @@ -133,6 +138,23 @@ struct lpfc_rqb { struct lpfc_queue { struct list_head list; struct list_head wq_list; + + /* + * If interrupts are in effect on _all_ the eq's the footprint + * of polling code is zero (except mode). This memory is chec- + * ked for every io to see if the io needs to be polled and + * while completion to check if the eq's needs to be rearmed. + * Keep in same cacheline as the queue ptr to avoid cpu fetch + * stalls. Using 1B memory will leave us with 7B hole. Fill + * it with other frequently used members. + */ + uint16_t last_cpu; /* most recent cpu */ + uint16_t hdwq; + uint8_t qe_valid; + uint8_t mode; /* interrupt or polling */ +#define LPFC_EQ_INTERRUPT 0 +#define LPFC_EQ_POLL 1 + struct list_head wqfull_list; enum lpfc_sli4_queue_type type; enum lpfc_sli4_queue_subtype subtype; @@ -199,6 +221,7 @@ struct lpfc_queue { uint8_t q_flag; #define HBA_NVMET_WQFULL 0x1 /* We hit WQ Full condition for NVMET */ #define HBA_NVMET_CQ_NOTIFY 0x1 /* LPFC_NVMET_CQ_NOTIFY CQEs this EQE */ +#define HBA_EQ_DELAY_CHK 0x2 /* EQ is a candidate for coalescing */ #define LPFC_NVMET_CQ_NOTIFY 4 void __iomem *db_regaddr; uint16_t dpp_enable; @@ -239,10 +262,8 @@ struct lpfc_queue { struct delayed_work sched_spwork; uint64_t isr_timestamp; - uint16_t hdwq; - uint16_t last_cpu; /* most recent cpu */ - uint8_t qe_valid; struct lpfc_queue *assoc_qp; + struct list_head _poll_list; void **q_pgs; /* array to index entries per page */ }; @@ -451,11 +472,17 @@ struct lpfc_hba; #define LPFC_SLI4_HANDLER_NAME_SZ 16 struct lpfc_hba_eq_hdl { uint32_t idx; + uint16_t irq; char handler_name[LPFC_SLI4_HANDLER_NAME_SZ]; struct lpfc_hba *phba; struct lpfc_queue *eq; + struct cpumask aff_mask; }; +#define lpfc_get_eq_hdl(eqidx) (&phba->sli4_hba.hba_eq_hdl[eqidx]) +#define lpfc_get_aff_mask(eqidx) (&phba->sli4_hba.hba_eq_hdl[eqidx].aff_mask) +#define lpfc_get_irq(eqidx) (phba->sli4_hba.hba_eq_hdl[eqidx].irq) + /*BB Credit recovery value*/ struct lpfc_bbscn_params { uint32_t word0; @@ -513,6 +540,7 @@ struct lpfc_pc_sli4_params { uint8_t cqav; uint8_t wqsize; uint8_t bv1s; + uint8_t pls; #define LPFC_WQ_SZ64_SUPPORT 1 #define LPFC_WQ_SZ128_SUPPORT 2 uint8_t wqpcnt; @@ -544,11 +572,10 @@ struct lpfc_sli4_lnk_info { #define LPFC_SLI4_HANDLER_CNT (LPFC_HBA_IO_CHAN_MAX+ \ LPFC_FOF_IO_CHAN_NUM) -/* Used for IRQ vector to CPU mapping */ +/* Used for tracking CPU mapping attributes */ struct lpfc_vector_map_info { uint16_t phys_id; uint16_t core_id; - uint16_t irq; uint16_t eq; uint16_t hdwq; uint16_t flag; @@ -891,6 +918,7 @@ struct lpfc_sli4_hba { struct lpfc_vector_map_info *cpu_map; uint16_t num_possible_cpu; uint16_t num_present_cpu; + struct cpumask numa_mask; uint16_t curr_disp_cpu; struct lpfc_eq_intr_info __percpu *eq_info; uint32_t conf_trunk; diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index b8aae31ffda3..9e5ff58edaca 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "12.4.0.0" +#define LPFC_DRIVER_VERSION "12.6.0.2" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ |