diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-12-02 13:37:02 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-12-02 13:37:02 -0800 |
commit | ef2cc88e2a205b8a11a19e78db63a70d3728cdf5 (patch) | |
tree | cf6a32c8b4875ef72cb90158f5220d3b150352b9 /drivers/scsi/lpfc/lpfc_init.c | |
parent | 937d6eefc716a9071f0e3bada19200de1bb9d048 (diff) | |
parent | 65309ef6b258f5a7b57c1033a82ba2aba5c434cc (diff) | |
download | lwn-ef2cc88e2a205b8a11a19e78db63a70d3728cdf5.tar.gz lwn-ef2cc88e2a205b8a11a19e78db63a70d3728cdf5.zip |
Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
Pull SCSI updates from James Bottomley:
"This is mostly update of the usual drivers: aacraid, ufs, zfcp,
NCR5380, lpfc, qla2xxx, smartpqi, hisi_sas, target, mpt3sas, pm80xx
plus a whole load of minor updates and fixes.
The major core changes are Al Viro's reworking of sg's handling of
copy to/from user, Ming Lei's removal of the host busy counter to
avoid contention in the multiqueue case and Damien Le Moal's fixing of
residual tracking across error handling"
* tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi: (251 commits)
scsi: bnx2fc: timeout calculation invalid for bnx2fc_eh_abort()
scsi: target: core: Fix a pr_debug() argument
scsi: iscsi: Don't send data to unbound connection
scsi: target: iscsi: Wait for all commands to finish before freeing a session
scsi: target: core: Release SPC-2 reservations when closing a session
scsi: target: core: Document target_cmd_size_check()
scsi: bnx2i: fix potential use after free
Revert "scsi: qla2xxx: Fix memory leak when sending I/O fails"
scsi: NCR5380: Add disconnect_mask module parameter
scsi: NCR5380: Unconditionally clear ICR after do_abort()
scsi: NCR5380: Call scsi_set_resid() on command completion
scsi: scsi_debug: num_tgts must be >= 0
scsi: lpfc: use hdwq assigned cpu for allocation
scsi: arcmsr: fix indentation issues
scsi: qla4xxx: fix double free bug
scsi: pm80xx: Modified the logic to collect fatal dump
scsi: pm80xx: Tie the interrupt name to the module instance
scsi: pm80xx: Controller fatal error through sysfs
scsi: pm80xx: Do not request 12G sas speeds
scsi: pm80xx: Cleanup command when a reset times out
...
Diffstat (limited to 'drivers/scsi/lpfc/lpfc_init.c')
-rw-r--r-- | drivers/scsi/lpfc/lpfc_init.c | 954 |
1 files changed, 744 insertions, 210 deletions
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index e8813d26e594..dc6f7c4b54c6 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -40,6 +40,8 @@ #include <linux/irq.h> #include <linux/bitops.h> #include <linux/crash_dump.h> +#include <linux/cpu.h> +#include <linux/cpuhotplug.h> #include <scsi/scsi.h> #include <scsi/scsi_device.h> @@ -66,9 +68,13 @@ #include "lpfc_version.h" #include "lpfc_ids.h" +static enum cpuhp_state lpfc_cpuhp_state; /* Used when mapping IRQ vectors in a driver centric manner */ static uint32_t lpfc_present_cpu; +static void __lpfc_cpuhp_remove(struct lpfc_hba *phba); +static void lpfc_cpuhp_remove(struct lpfc_hba *phba); +static void lpfc_cpuhp_add(struct lpfc_hba *phba); static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *); static int lpfc_post_rcv_buf(struct lpfc_hba *); static int lpfc_sli4_queue_verify(struct lpfc_hba *); @@ -1235,10 +1241,9 @@ lpfc_hb_eq_delay_work(struct work_struct *work) struct lpfc_hba, eq_delay_work); struct lpfc_eq_intr_info *eqi, *eqi_new; struct lpfc_queue *eq, *eq_next; - unsigned char *eqcnt = NULL; + unsigned char *ena_delay = NULL; uint32_t usdelay; int i; - bool update = false; if (!phba->cfg_auto_imax || phba->pport->load_flag & FC_UNLOADING) return; @@ -1247,44 +1252,36 @@ lpfc_hb_eq_delay_work(struct work_struct *work) phba->pport->fc_flag & FC_OFFLINE_MODE) goto requeue; - eqcnt = kcalloc(num_possible_cpus(), sizeof(unsigned char), - GFP_KERNEL); - if (!eqcnt) + ena_delay = kcalloc(phba->sli4_hba.num_possible_cpu, sizeof(*ena_delay), + GFP_KERNEL); + if (!ena_delay) goto requeue; - if (phba->cfg_irq_chann > 1) { - /* Loop thru all IRQ vectors */ - for (i = 0; i < phba->cfg_irq_chann; i++) { - /* Get the EQ corresponding to the IRQ vector */ - eq = phba->sli4_hba.hba_eq_hdl[i].eq; - if (!eq) - continue; - if (eq->q_mode) { - update = true; - break; - } - if (eqcnt[eq->last_cpu] < 2) - eqcnt[eq->last_cpu]++; + for (i = 0; i < phba->cfg_irq_chann; i++) { + /* Get the EQ corresponding to the IRQ vector */ + eq = phba->sli4_hba.hba_eq_hdl[i].eq; + if (!eq) + continue; + if (eq->q_mode || eq->q_flag & HBA_EQ_DELAY_CHK) { + eq->q_flag &= ~HBA_EQ_DELAY_CHK; + ena_delay[eq->last_cpu] = 1; } - } else - update = true; + } for_each_present_cpu(i) { eqi = per_cpu_ptr(phba->sli4_hba.eq_info, i); - if (!update && eqcnt[i] < 2) { - eqi->icnt = 0; - continue; + if (ena_delay[i]) { + usdelay = (eqi->icnt >> 10) * LPFC_EQ_DELAY_STEP; + if (usdelay > LPFC_MAX_AUTO_EQ_DELAY) + usdelay = LPFC_MAX_AUTO_EQ_DELAY; + } else { + usdelay = 0; } - usdelay = (eqi->icnt / LPFC_IMAX_THRESHOLD) * - LPFC_EQ_DELAY_STEP; - if (usdelay > LPFC_MAX_AUTO_EQ_DELAY) - usdelay = LPFC_MAX_AUTO_EQ_DELAY; - eqi->icnt = 0; list_for_each_entry_safe(eq, eq_next, &eqi->list, cpu_list) { - if (eq->last_cpu != i) { + if (unlikely(eq->last_cpu != i)) { eqi_new = per_cpu_ptr(phba->sli4_hba.eq_info, eq->last_cpu); list_move_tail(&eq->cpu_list, &eqi_new->list); @@ -1296,7 +1293,7 @@ lpfc_hb_eq_delay_work(struct work_struct *work) } } - kfree(eqcnt); + kfree(ena_delay); requeue: queue_delayed_work(phba->wq, &phba->eq_delay_work, @@ -3053,11 +3050,12 @@ lpfc_sli4_node_prep(struct lpfc_hba *phba) continue; } ndlp->nlp_rpi = rpi; - lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, - "0009 rpi:%x DID:%x " - "flg:%x map:%x x%px\n", ndlp->nlp_rpi, - ndlp->nlp_DID, ndlp->nlp_flag, - ndlp->nlp_usg_map, ndlp); + lpfc_printf_vlog(ndlp->vport, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, + "0009 Assign RPI x%x to ndlp x%px " + "DID:x%06x flg:x%x map:x%x\n", + ndlp->nlp_rpi, ndlp, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp->nlp_usg_map); } } lpfc_destroy_vport_work_array(phba, vports); @@ -3387,6 +3385,8 @@ lpfc_online(struct lpfc_hba *phba) if (phba->cfg_xri_rebalancing) lpfc_create_multixri_pools(phba); + lpfc_cpuhp_add(phba); + lpfc_unblock_mgmt_io(phba); return 0; } @@ -3453,10 +3453,15 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) list_for_each_entry_safe(ndlp, next_ndlp, &vports[i]->fc_nodes, nlp_listp) { - if (!NLP_CHK_NODE_ACT(ndlp)) - continue; - if (ndlp->nlp_state == NLP_STE_UNUSED_NODE) + if ((!NLP_CHK_NODE_ACT(ndlp)) || + ndlp->nlp_state == NLP_STE_UNUSED_NODE) { + /* Driver must assume RPI is invalid for + * any unused or inactive node. + */ + ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR; continue; + } + if (ndlp->nlp_type & NLP_FABRIC) { lpfc_disc_state_machine(vports[i], ndlp, NULL, NLP_EVT_DEVICE_RECOVERY); @@ -3472,16 +3477,16 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) * comes back online. */ if (phba->sli_rev == LPFC_SLI_REV4) { - lpfc_printf_vlog(ndlp->vport, - KERN_INFO, LOG_NODE, - "0011 lpfc_offline: " - "ndlp:x%px did %x " - "usgmap:x%x rpi:%x\n", - ndlp, ndlp->nlp_DID, - ndlp->nlp_usg_map, - ndlp->nlp_rpi); - + lpfc_printf_vlog(ndlp->vport, KERN_INFO, + LOG_NODE | LOG_DISCOVERY, + "0011 Free RPI x%x on " + "ndlp:x%px did x%x " + "usgmap:x%x\n", + ndlp->nlp_rpi, ndlp, + ndlp->nlp_DID, + ndlp->nlp_usg_map); lpfc_sli4_free_rpi(phba, ndlp->nlp_rpi); + ndlp->nlp_rpi = LPFC_RPI_ALLOC_ERROR; } lpfc_unreg_rpi(vports[i], ndlp); } @@ -3545,6 +3550,7 @@ lpfc_offline(struct lpfc_hba *phba) spin_unlock_irq(shost->host_lock); } lpfc_destroy_vport_work_array(phba, vports); + __lpfc_cpuhp_remove(phba); if (phba->cfg_xri_rebalancing) lpfc_destroy_multixri_pools(phba); @@ -5283,10 +5289,10 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli) evt_type = bf_get(lpfc_trailer_type, acqe_sli); lpfc_printf_log(phba, KERN_INFO, LOG_SLI, - "2901 Async SLI event - Event Data1:x%08x Event Data2:" - "x%08x SLI Event Type:%d\n", + "2901 Async SLI event - Type:%d, Event Data: x%08x " + "x%08x x%08x x%08x\n", evt_type, acqe_sli->event_data1, acqe_sli->event_data2, - evt_type); + acqe_sli->reserved, acqe_sli->trailer); port_name = phba->Port[0]; if (port_name == 0x00) @@ -5433,11 +5439,26 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli) "Event Data1:x%08x Event Data2: x%08x\n", acqe_sli->event_data1, acqe_sli->event_data2); break; + case LPFC_SLI_EVENT_TYPE_MISCONF_FAWWN: + /* Misconfigured WWN. Reports that the SLI Port is configured + * to use FA-WWN, but the attached device doesn’t support it. + * No driver action is required. + * Event Data1 - N.A, Event Data2 - N.A + */ + lpfc_log_msg(phba, KERN_WARNING, LOG_SLI, + "2699 Misconfigured FA-WWN - Attached device does " + "not support FA-WWN\n"); + break; + case LPFC_SLI_EVENT_TYPE_EEPROM_FAILURE: + /* EEPROM failure. No driver action is required */ + lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, + "2518 EEPROM failure - " + "Event Data1: x%08x Event Data2: x%08x\n", + acqe_sli->event_data1, acqe_sli->event_data2); + break; default: lpfc_printf_log(phba, KERN_INFO, LOG_SLI, - "3193 Async SLI event - Event Data1:x%08x Event Data2:" - "x%08x SLI Event Type:%d\n", - acqe_sli->event_data1, acqe_sli->event_data2, + "3193 Unrecognized SLI event, type: 0x%x", evt_type); break; } @@ -5976,6 +5997,29 @@ static void lpfc_log_intr_mode(struct lpfc_hba *phba, uint32_t intr_mode) } /** + * lpfc_cpumask_of_node_init - initalizes cpumask of phba's NUMA node + * @phba: Pointer to HBA context object. + * + **/ +static void +lpfc_cpumask_of_node_init(struct lpfc_hba *phba) +{ + unsigned int cpu, numa_node; + struct cpumask *numa_mask = &phba->sli4_hba.numa_mask; + + cpumask_clear(numa_mask); + + /* Check if we're a NUMA architecture */ + numa_node = dev_to_node(&phba->pcidev->dev); + if (numa_node == NUMA_NO_NODE) + return; + + for_each_possible_cpu(cpu) + if (cpu_to_node(cpu) == numa_node) + cpumask_set_cpu(cpu, numa_mask); +} + +/** * lpfc_enable_pci_dev - Enable a generic PCI device. * @phba: pointer to lpfc hba data structure. * @@ -6418,6 +6462,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) phba->sli4_hba.num_present_cpu = lpfc_present_cpu; phba->sli4_hba.num_possible_cpu = num_possible_cpus(); phba->sli4_hba.curr_disp_cpu = 0; + lpfc_cpumask_of_node_init(phba); /* Get all the module params for configuring this host */ lpfc_get_cfgparam(phba); @@ -6953,6 +6998,7 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba) phba->sli4_hba.num_possible_cpu = 0; phba->sli4_hba.num_present_cpu = 0; phba->sli4_hba.curr_disp_cpu = 0; + cpumask_clear(&phba->sli4_hba.numa_mask); /* Free memory allocated for fast-path work queue handles */ kfree(phba->sli4_hba.hba_eq_hdl); @@ -7126,7 +7172,7 @@ lpfc_init_iocb_list(struct lpfc_hba *phba, int iocb_count) if (iocbq_entry == NULL) { printk(KERN_ERR "%s: only allocated %d iocbs of " "expected %d count. Unloading driver.\n", - __func__, i, LPFC_IOCB_LIST_CNT); + __func__, i, iocb_count); goto out_free_iocbq; } @@ -7545,18 +7591,10 @@ lpfc_create_shost(struct lpfc_hba *phba) if (phba->nvmet_support) { /* Only 1 vport (pport) will support NVME target */ - if (phba->txrdy_payload_pool == NULL) { - phba->txrdy_payload_pool = dma_pool_create( - "txrdy_pool", &phba->pcidev->dev, - TXRDY_PAYLOAD_LEN, 16, 0); - if (phba->txrdy_payload_pool) { - phba->targetport = NULL; - phba->cfg_enable_fc4_type = LPFC_ENABLE_NVME; - lpfc_printf_log(phba, KERN_INFO, - LOG_INIT | LOG_NVME_DISC, - "6076 NVME Target Found\n"); - } - } + phba->targetport = NULL; + phba->cfg_enable_fc4_type = LPFC_ENABLE_NVME; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_NVME_DISC, + "6076 NVME Target Found\n"); } lpfc_debugfs_initialize(vport); @@ -8235,6 +8273,94 @@ lpfc_destroy_bootstrap_mbox(struct lpfc_hba *phba) memset(&phba->sli4_hba.bmbx, 0, sizeof(struct lpfc_bmbx)); } +static const char * const lpfc_topo_to_str[] = { + "Loop then P2P", + "Loopback", + "P2P Only", + "Unsupported", + "Loop Only", + "Unsupported", + "P2P then Loop", +}; + +/** + * lpfc_map_topology - Map the topology read from READ_CONFIG + * @phba: pointer to lpfc hba data structure. + * @rdconf: pointer to read config data + * + * This routine is invoked to map the topology values as read + * from the read config mailbox command. If the persistent + * topology feature is supported, the firmware will provide the + * saved topology information to be used in INIT_LINK + * + **/ +#define LINK_FLAGS_DEF 0x0 +#define LINK_FLAGS_P2P 0x1 +#define LINK_FLAGS_LOOP 0x2 +static void +lpfc_map_topology(struct lpfc_hba *phba, struct lpfc_mbx_read_config *rd_config) +{ + u8 ptv, tf, pt; + + ptv = bf_get(lpfc_mbx_rd_conf_ptv, rd_config); + tf = bf_get(lpfc_mbx_rd_conf_tf, rd_config); + pt = bf_get(lpfc_mbx_rd_conf_pt, rd_config); + + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + "2027 Read Config Data : ptv:0x%x, tf:0x%x pt:0x%x", + ptv, tf, pt); + if (!ptv) { + lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, + "2019 FW does not support persistent topology " + "Using driver parameter defined value [%s]", + lpfc_topo_to_str[phba->cfg_topology]); + return; + } + /* FW supports persistent topology - override module parameter value */ + phba->hba_flag |= HBA_PERSISTENT_TOPO; + switch (phba->pcidev->device) { + case PCI_DEVICE_ID_LANCER_G7_FC: + if (tf || (pt == LINK_FLAGS_LOOP)) { + /* Invalid values from FW - use driver params */ + phba->hba_flag &= ~HBA_PERSISTENT_TOPO; + } else { + /* Prism only supports PT2PT topology */ + phba->cfg_topology = FLAGS_TOPOLOGY_MODE_PT_PT; + } + break; + case PCI_DEVICE_ID_LANCER_G6_FC: + if (!tf) { + phba->cfg_topology = ((pt == LINK_FLAGS_LOOP) + ? FLAGS_TOPOLOGY_MODE_LOOP + : FLAGS_TOPOLOGY_MODE_PT_PT); + } else { + phba->hba_flag &= ~HBA_PERSISTENT_TOPO; + } + break; + default: /* G5 */ + if (tf) { + /* If topology failover set - pt is '0' or '1' */ + phba->cfg_topology = (pt ? FLAGS_TOPOLOGY_MODE_PT_LOOP : + FLAGS_TOPOLOGY_MODE_LOOP_PT); + } else { + phba->cfg_topology = ((pt == LINK_FLAGS_P2P) + ? FLAGS_TOPOLOGY_MODE_PT_PT + : FLAGS_TOPOLOGY_MODE_LOOP); + } + break; + } + if (phba->hba_flag & HBA_PERSISTENT_TOPO) { + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + "2020 Using persistent topology value [%s]", + lpfc_topo_to_str[phba->cfg_topology]); + } else { + lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, + "2021 Invalid topology values from FW " + "Using driver parameter defined value [%s]", + lpfc_topo_to_str[phba->cfg_topology]); + } +} + /** * lpfc_sli4_read_config - Get the config parameters. * @phba: pointer to lpfc hba data structure. @@ -8346,6 +8472,7 @@ lpfc_sli4_read_config(struct lpfc_hba *phba) phba->max_vpi = (phba->sli4_hba.max_cfg_param.max_vpi > 0) ? (phba->sli4_hba.max_cfg_param.max_vpi - 1) : 0; phba->max_vports = phba->max_vpi; + lpfc_map_topology(phba, rd_config); lpfc_printf_log(phba, KERN_INFO, LOG_SLI, "2003 cfg params Extents? %d " "XRI(B:%d M:%d), " @@ -8619,8 +8746,8 @@ lpfc_sli4_queue_verify(struct lpfc_hba *phba) */ if (phba->nvmet_support) { - if (phba->cfg_irq_chann < phba->cfg_nvmet_mrq) - phba->cfg_nvmet_mrq = phba->cfg_irq_chann; + if (phba->cfg_hdw_queue < phba->cfg_nvmet_mrq) + phba->cfg_nvmet_mrq = phba->cfg_hdw_queue; if (phba->cfg_nvmet_mrq > LPFC_NVMET_MRQ_MAX) phba->cfg_nvmet_mrq = LPFC_NVMET_MRQ_MAX; } @@ -9160,6 +9287,8 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba) } spin_unlock_irq(&phba->hbalock); + lpfc_sli4_cleanup_poll_list(phba); + /* Release HBA eqs */ if (phba->sli4_hba.hdwq) lpfc_sli4_release_hdwq(phba); @@ -10581,7 +10710,6 @@ lpfc_find_cpu_handle(struct lpfc_hba *phba, uint16_t id, int match) */ if ((match == LPFC_FIND_BY_EQ) && (cpup->flag & LPFC_CPU_FIRST_IRQ) && - (cpup->irq != LPFC_VECTOR_MAP_EMPTY) && (cpup->eq == id)) return cpu; @@ -10619,6 +10747,75 @@ lpfc_find_hyper(struct lpfc_hba *phba, int cpu, } #endif +/* + * lpfc_assign_eq_map_info - Assigns eq for vector_map structure + * @phba: pointer to lpfc hba data structure. + * @eqidx: index for eq and irq vector + * @flag: flags to set for vector_map structure + * @cpu: cpu used to index vector_map structure + * + * The routine assigns eq info into vector_map structure + */ +static inline void +lpfc_assign_eq_map_info(struct lpfc_hba *phba, uint16_t eqidx, uint16_t flag, + unsigned int cpu) +{ + struct lpfc_vector_map_info *cpup = &phba->sli4_hba.cpu_map[cpu]; + struct lpfc_hba_eq_hdl *eqhdl = lpfc_get_eq_hdl(eqidx); + + cpup->eq = eqidx; + cpup->flag |= flag; + + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3336 Set Affinity: CPU %d irq %d eq %d flag x%x\n", + cpu, eqhdl->irq, cpup->eq, cpup->flag); +} + +/** + * lpfc_cpu_map_array_init - Initialize cpu_map structure + * @phba: pointer to lpfc hba data structure. + * + * The routine initializes the cpu_map array structure + */ +static void +lpfc_cpu_map_array_init(struct lpfc_hba *phba) +{ + struct lpfc_vector_map_info *cpup; + struct lpfc_eq_intr_info *eqi; + int cpu; + + for_each_possible_cpu(cpu) { + cpup = &phba->sli4_hba.cpu_map[cpu]; + cpup->phys_id = LPFC_VECTOR_MAP_EMPTY; + cpup->core_id = LPFC_VECTOR_MAP_EMPTY; + cpup->hdwq = LPFC_VECTOR_MAP_EMPTY; + cpup->eq = LPFC_VECTOR_MAP_EMPTY; + cpup->flag = 0; + eqi = per_cpu_ptr(phba->sli4_hba.eq_info, cpu); + INIT_LIST_HEAD(&eqi->list); + eqi->icnt = 0; + } +} + +/** + * lpfc_hba_eq_hdl_array_init - Initialize hba_eq_hdl structure + * @phba: pointer to lpfc hba data structure. + * + * The routine initializes the hba_eq_hdl array structure + */ +static void +lpfc_hba_eq_hdl_array_init(struct lpfc_hba *phba) +{ + struct lpfc_hba_eq_hdl *eqhdl; + int i; + + for (i = 0; i < phba->cfg_irq_chann; i++) { + eqhdl = lpfc_get_eq_hdl(i); + eqhdl->irq = LPFC_VECTOR_MAP_EMPTY; + eqhdl->phba = phba; + } +} + /** * lpfc_cpu_affinity_check - Check vector CPU affinity mappings * @phba: pointer to lpfc hba data structure. @@ -10637,22 +10834,10 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors) int max_core_id, min_core_id; struct lpfc_vector_map_info *cpup; struct lpfc_vector_map_info *new_cpup; - const struct cpumask *maskp; #ifdef CONFIG_X86 struct cpuinfo_x86 *cpuinfo; #endif - /* Init cpu_map array */ - for_each_possible_cpu(cpu) { - cpup = &phba->sli4_hba.cpu_map[cpu]; - cpup->phys_id = LPFC_VECTOR_MAP_EMPTY; - cpup->core_id = LPFC_VECTOR_MAP_EMPTY; - cpup->hdwq = LPFC_VECTOR_MAP_EMPTY; - cpup->eq = LPFC_VECTOR_MAP_EMPTY; - cpup->irq = LPFC_VECTOR_MAP_EMPTY; - cpup->flag = 0; - } - max_phys_id = 0; min_phys_id = LPFC_VECTOR_MAP_EMPTY; max_core_id = 0; @@ -10688,65 +10873,6 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors) min_core_id = cpup->core_id; } - for_each_possible_cpu(i) { - struct lpfc_eq_intr_info *eqi = - per_cpu_ptr(phba->sli4_hba.eq_info, i); - - INIT_LIST_HEAD(&eqi->list); - eqi->icnt = 0; - } - - /* This loop sets up all CPUs that are affinitized with a - * irq vector assigned to the driver. All affinitized CPUs - * will get a link to that vectors IRQ and EQ. - * - * NULL affinity mask handling: - * If irq count is greater than one, log an error message. - * If the null mask is received for the first irq, find the - * first present cpu, and assign the eq index to ensure at - * least one EQ is assigned. - */ - for (idx = 0; idx < phba->cfg_irq_chann; idx++) { - /* Get a CPU mask for all CPUs affinitized to this vector */ - maskp = pci_irq_get_affinity(phba->pcidev, idx); - if (!maskp) { - if (phba->cfg_irq_chann > 1) - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "3329 No affinity mask found " - "for vector %d (%d)\n", - idx, phba->cfg_irq_chann); - if (!idx) { - cpu = cpumask_first(cpu_present_mask); - cpup = &phba->sli4_hba.cpu_map[cpu]; - cpup->eq = idx; - cpup->irq = pci_irq_vector(phba->pcidev, idx); - cpup->flag |= LPFC_CPU_FIRST_IRQ; - } - break; - } - - i = 0; - /* Loop through all CPUs associated with vector idx */ - for_each_cpu_and(cpu, maskp, cpu_present_mask) { - /* Set the EQ index and IRQ for that vector */ - cpup = &phba->sli4_hba.cpu_map[cpu]; - cpup->eq = idx; - cpup->irq = pci_irq_vector(phba->pcidev, idx); - - /* If this is the first CPU thats assigned to this - * vector, set LPFC_CPU_FIRST_IRQ. - */ - if (!i) - cpup->flag |= LPFC_CPU_FIRST_IRQ; - i++; - - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "3336 Set Affinity: CPU %d " - "irq %d eq %d flag x%x\n", - cpu, cpup->irq, cpup->eq, cpup->flag); - } - } - /* After looking at each irq vector assigned to this pcidev, its * possible to see that not ALL CPUs have been accounted for. * Next we will set any unassigned (unaffinitized) cpu map @@ -10772,7 +10898,7 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors) for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { new_cpup = &phba->sli4_hba.cpu_map[new_cpu]; if (!(new_cpup->flag & LPFC_CPU_MAP_UNASSIGN) && - (new_cpup->irq != LPFC_VECTOR_MAP_EMPTY) && + (new_cpup->eq != LPFC_VECTOR_MAP_EMPTY) && (new_cpup->phys_id == cpup->phys_id)) goto found_same; new_cpu = cpumask_next( @@ -10785,7 +10911,6 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors) found_same: /* We found a matching phys_id, so copy the IRQ info */ cpup->eq = new_cpup->eq; - cpup->irq = new_cpup->irq; /* Bump start_cpu to the next slot to minmize the * chance of having multiple unassigned CPU entries @@ -10797,9 +10922,10 @@ found_same: lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "3337 Set Affinity: CPU %d " - "irq %d from id %d same " + "eq %d from peer cpu %d same " "phys_id (%d)\n", - cpu, cpup->irq, new_cpu, cpup->phys_id); + cpu, cpup->eq, new_cpu, + cpup->phys_id); } } @@ -10823,7 +10949,7 @@ found_same: for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { new_cpup = &phba->sli4_hba.cpu_map[new_cpu]; if (!(new_cpup->flag & LPFC_CPU_MAP_UNASSIGN) && - (new_cpup->irq != LPFC_VECTOR_MAP_EMPTY)) + (new_cpup->eq != LPFC_VECTOR_MAP_EMPTY)) goto found_any; new_cpu = cpumask_next( new_cpu, cpu_present_mask); @@ -10833,13 +10959,12 @@ found_same: /* We should never leave an entry unassigned */ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3339 Set Affinity: CPU %d " - "irq %d UNASSIGNED\n", - cpup->hdwq, cpup->irq); + "eq %d UNASSIGNED\n", + cpup->hdwq, cpup->eq); continue; found_any: /* We found an available entry, copy the IRQ info */ cpup->eq = new_cpup->eq; - cpup->irq = new_cpup->irq; /* Bump start_cpu to the next slot to minmize the * chance of having multiple unassigned CPU entries @@ -10851,8 +10976,8 @@ found_any: lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "3338 Set Affinity: CPU %d " - "irq %d from id %d (%d/%d)\n", - cpu, cpup->irq, new_cpu, + "eq %d from peer cpu %d (%d/%d)\n", + cpu, cpup->eq, new_cpu, new_cpup->phys_id, new_cpup->core_id); } } @@ -10873,11 +10998,11 @@ found_any: idx++; lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3333 Set Affinity: CPU %d (phys %d core %d): " - "hdwq %d eq %d irq %d flg x%x\n", + "hdwq %d eq %d flg x%x\n", cpu, cpup->phys_id, cpup->core_id, - cpup->hdwq, cpup->eq, cpup->irq, cpup->flag); + cpup->hdwq, cpup->eq, cpup->flag); } - /* Finally we need to associate a hdwq with each cpu_map entry + /* Associate a hdwq with each cpu_map entry * This will be 1 to 1 - hdwq to cpu, unless there are less * hardware queues then CPUs. For that case we will just round-robin * the available hardware queues as they get assigned to CPUs. @@ -10951,9 +11076,26 @@ found_any: logit: lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "3335 Set Affinity: CPU %d (phys %d core %d): " - "hdwq %d eq %d irq %d flg x%x\n", + "hdwq %d eq %d flg x%x\n", cpu, cpup->phys_id, cpup->core_id, - cpup->hdwq, cpup->eq, cpup->irq, cpup->flag); + cpup->hdwq, cpup->eq, cpup->flag); + } + + /* + * Initialize the cpu_map slots for not-present cpus in case + * a cpu is hot-added. Perform a simple hdwq round robin assignment. + */ + idx = 0; + for_each_possible_cpu(cpu) { + cpup = &phba->sli4_hba.cpu_map[cpu]; + if (cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) + continue; + + cpup->hdwq = idx++ % phba->cfg_hdw_queue; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3340 Set Affinity: not present " + "CPU %d hdwq %d\n", + cpu, cpup->hdwq); } /* The cpu_map array will be used later during initialization @@ -10963,11 +11105,280 @@ found_any: } /** + * lpfc_cpuhp_get_eq + * + * @phba: pointer to lpfc hba data structure. + * @cpu: cpu going offline + * @eqlist: + */ +static void +lpfc_cpuhp_get_eq(struct lpfc_hba *phba, unsigned int cpu, + struct list_head *eqlist) +{ + const struct cpumask *maskp; + struct lpfc_queue *eq; + cpumask_t tmp; + u16 idx; + + for (idx = 0; idx < phba->cfg_irq_chann; idx++) { + maskp = pci_irq_get_affinity(phba->pcidev, idx); + if (!maskp) + continue; + /* + * if irq is not affinitized to the cpu going + * then we don't need to poll the eq attached + * to it. + */ + if (!cpumask_and(&tmp, maskp, cpumask_of(cpu))) + continue; + /* get the cpus that are online and are affini- + * tized to this irq vector. If the count is + * more than 1 then cpuhp is not going to shut- + * down this vector. Since this cpu has not + * gone offline yet, we need >1. + */ + cpumask_and(&tmp, maskp, cpu_online_mask); + if (cpumask_weight(&tmp) > 1) + continue; + + /* Now that we have an irq to shutdown, get the eq + * mapped to this irq. Note: multiple hdwq's in + * the software can share an eq, but eventually + * only eq will be mapped to this vector + */ + eq = phba->sli4_hba.hba_eq_hdl[idx].eq; + list_add(&eq->_poll_list, eqlist); + } +} + +static void __lpfc_cpuhp_remove(struct lpfc_hba *phba) +{ + if (phba->sli_rev != LPFC_SLI_REV4) + return; + + cpuhp_state_remove_instance_nocalls(lpfc_cpuhp_state, + &phba->cpuhp); + /* + * unregistering the instance doesn't stop the polling + * timer. Wait for the poll timer to retire. + */ + synchronize_rcu(); + del_timer_sync(&phba->cpuhp_poll_timer); +} + +static void lpfc_cpuhp_remove(struct lpfc_hba *phba) +{ + if (phba->pport->fc_flag & FC_OFFLINE_MODE) + return; + + __lpfc_cpuhp_remove(phba); +} + +static void lpfc_cpuhp_add(struct lpfc_hba *phba) +{ + if (phba->sli_rev != LPFC_SLI_REV4) + return; + + rcu_read_lock(); + + if (!list_empty(&phba->poll_list)) { + timer_setup(&phba->cpuhp_poll_timer, lpfc_sli4_poll_hbtimer, 0); + mod_timer(&phba->cpuhp_poll_timer, + jiffies + msecs_to_jiffies(LPFC_POLL_HB)); + } + + rcu_read_unlock(); + + cpuhp_state_add_instance_nocalls(lpfc_cpuhp_state, + &phba->cpuhp); +} + +static int __lpfc_cpuhp_checks(struct lpfc_hba *phba, int *retval) +{ + if (phba->pport->load_flag & FC_UNLOADING) { + *retval = -EAGAIN; + return true; + } + + if (phba->sli_rev != LPFC_SLI_REV4) { + *retval = 0; + return true; + } + + /* proceed with the hotplug */ + return false; +} + +/** + * lpfc_irq_set_aff - set IRQ affinity + * @eqhdl: EQ handle + * @cpu: cpu to set affinity + * + **/ +static inline void +lpfc_irq_set_aff(struct lpfc_hba_eq_hdl *eqhdl, unsigned int cpu) +{ + cpumask_clear(&eqhdl->aff_mask); + cpumask_set_cpu(cpu, &eqhdl->aff_mask); + irq_set_status_flags(eqhdl->irq, IRQ_NO_BALANCING); + irq_set_affinity_hint(eqhdl->irq, &eqhdl->aff_mask); +} + +/** + * lpfc_irq_clear_aff - clear IRQ affinity + * @eqhdl: EQ handle + * + **/ +static inline void +lpfc_irq_clear_aff(struct lpfc_hba_eq_hdl *eqhdl) +{ + cpumask_clear(&eqhdl->aff_mask); + irq_clear_status_flags(eqhdl->irq, IRQ_NO_BALANCING); + irq_set_affinity_hint(eqhdl->irq, &eqhdl->aff_mask); +} + +/** + * lpfc_irq_rebalance - rebalances IRQ affinity according to cpuhp event + * @phba: pointer to HBA context object. + * @cpu: cpu going offline/online + * @offline: true, cpu is going offline. false, cpu is coming online. + * + * If cpu is going offline, we'll try our best effort to find the next + * online cpu on the phba's NUMA node and migrate all offlining IRQ affinities. + * + * If cpu is coming online, reaffinitize the IRQ back to the onlineng cpu. + * + * Note: Call only if cfg_irq_numa is enabled, otherwise rely on + * PCI_IRQ_AFFINITY to auto-manage IRQ affinity. + * + **/ +static void +lpfc_irq_rebalance(struct lpfc_hba *phba, unsigned int cpu, bool offline) +{ + struct lpfc_vector_map_info *cpup; + struct cpumask *aff_mask; + unsigned int cpu_select, cpu_next, idx; + const struct cpumask *numa_mask; + + if (!phba->cfg_irq_numa) + return; + + numa_mask = &phba->sli4_hba.numa_mask; + + if (!cpumask_test_cpu(cpu, numa_mask)) + return; + + cpup = &phba->sli4_hba.cpu_map[cpu]; + + if (!(cpup->flag & LPFC_CPU_FIRST_IRQ)) + return; + + if (offline) { + /* Find next online CPU on NUMA node */ + cpu_next = cpumask_next_wrap(cpu, numa_mask, cpu, true); + cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu_next); + + /* Found a valid CPU */ + if ((cpu_select < nr_cpu_ids) && (cpu_select != cpu)) { + /* Go through each eqhdl and ensure offlining + * cpu aff_mask is migrated + */ + for (idx = 0; idx < phba->cfg_irq_chann; idx++) { + aff_mask = lpfc_get_aff_mask(idx); + + /* Migrate affinity */ + if (cpumask_test_cpu(cpu, aff_mask)) + lpfc_irq_set_aff(lpfc_get_eq_hdl(idx), + cpu_select); + } + } else { + /* Rely on irqbalance if no online CPUs left on NUMA */ + for (idx = 0; idx < phba->cfg_irq_chann; idx++) + lpfc_irq_clear_aff(lpfc_get_eq_hdl(idx)); + } + } else { + /* Migrate affinity back to this CPU */ + lpfc_irq_set_aff(lpfc_get_eq_hdl(cpup->eq), cpu); + } +} + +static int lpfc_cpu_offline(unsigned int cpu, struct hlist_node *node) +{ + struct lpfc_hba *phba = hlist_entry_safe(node, struct lpfc_hba, cpuhp); + struct lpfc_queue *eq, *next; + LIST_HEAD(eqlist); + int retval; + + if (!phba) { + WARN_ONCE(!phba, "cpu: %u. phba:NULL", raw_smp_processor_id()); + return 0; + } + + if (__lpfc_cpuhp_checks(phba, &retval)) + return retval; + + lpfc_irq_rebalance(phba, cpu, true); + + lpfc_cpuhp_get_eq(phba, cpu, &eqlist); + + /* start polling on these eq's */ + list_for_each_entry_safe(eq, next, &eqlist, _poll_list) { + list_del_init(&eq->_poll_list); + lpfc_sli4_start_polling(eq); + } + + return 0; +} + +static int lpfc_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + struct lpfc_hba *phba = hlist_entry_safe(node, struct lpfc_hba, cpuhp); + struct lpfc_queue *eq, *next; + unsigned int n; + int retval; + + if (!phba) { + WARN_ONCE(!phba, "cpu: %u. phba:NULL", raw_smp_processor_id()); + return 0; + } + + if (__lpfc_cpuhp_checks(phba, &retval)) + return retval; + + lpfc_irq_rebalance(phba, cpu, false); + + list_for_each_entry_safe(eq, next, &phba->poll_list, _poll_list) { + n = lpfc_find_cpu_handle(phba, eq->hdwq, LPFC_FIND_BY_HDWQ); + if (n == cpu) + lpfc_sli4_stop_polling(eq); + } + + return 0; +} + +/** * lpfc_sli4_enable_msix - Enable MSI-X interrupt mode to SLI-4 device * @phba: pointer to lpfc hba data structure. * * This routine is invoked to enable the MSI-X interrupt vectors to device - * with SLI-4 interface spec. + * with SLI-4 interface spec. It also allocates MSI-X vectors and maps them + * to cpus on the system. + * + * When cfg_irq_numa is enabled, the adapter will only allocate vectors for + * the number of cpus on the same numa node as this adapter. The vectors are + * allocated without requesting OS affinity mapping. A vector will be + * allocated and assigned to each online and offline cpu. If the cpu is + * online, then affinity will be set to that cpu. If the cpu is offline, then + * affinity will be set to the nearest peer cpu within the numa node that is + * online. If there are no online cpus within the numa node, affinity is not + * assigned and the OS may do as it pleases. Note: cpu vector affinity mapping + * is consistent with the way cpu online/offline is handled when cfg_irq_numa is + * configured. + * + * If numa mode is not enabled and there is more than 1 vector allocated, then + * the driver relies on the managed irq interface where the OS assigns vector to + * cpu affinity. The driver will then use that affinity mapping to setup its + * cpu mapping table. * * Return codes * 0 - successful @@ -10978,13 +11389,31 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) { int vectors, rc, index; char *name; + const struct cpumask *numa_mask = NULL; + unsigned int cpu = 0, cpu_cnt = 0, cpu_select = nr_cpu_ids; + struct lpfc_hba_eq_hdl *eqhdl; + const struct cpumask *maskp; + bool first; + unsigned int flags = PCI_IRQ_MSIX; /* Set up MSI-X multi-message vectors */ vectors = phba->cfg_irq_chann; - rc = pci_alloc_irq_vectors(phba->pcidev, - 1, - vectors, PCI_IRQ_MSIX | PCI_IRQ_AFFINITY); + if (phba->cfg_irq_numa) { + numa_mask = &phba->sli4_hba.numa_mask; + cpu_cnt = cpumask_weight(numa_mask); + vectors = min(phba->cfg_irq_chann, cpu_cnt); + + /* cpu: iterates over numa_mask including offline or online + * cpu_select: iterates over online numa_mask to set affinity + */ + cpu = cpumask_first(numa_mask); + cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu); + } else { + flags |= PCI_IRQ_AFFINITY; + } + + rc = pci_alloc_irq_vectors(phba->pcidev, 1, vectors, flags); if (rc < 0) { lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "0484 PCI enable MSI-X failed (%d)\n", rc); @@ -10994,23 +11423,61 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) /* Assign MSI-X vectors to interrupt handlers */ for (index = 0; index < vectors; index++) { - name = phba->sli4_hba.hba_eq_hdl[index].handler_name; + eqhdl = lpfc_get_eq_hdl(index); + name = eqhdl->handler_name; memset(name, 0, LPFC_SLI4_HANDLER_NAME_SZ); snprintf(name, LPFC_SLI4_HANDLER_NAME_SZ, LPFC_DRIVER_HANDLER_NAME"%d", index); - phba->sli4_hba.hba_eq_hdl[index].idx = index; - phba->sli4_hba.hba_eq_hdl[index].phba = phba; + eqhdl->idx = index; rc = request_irq(pci_irq_vector(phba->pcidev, index), &lpfc_sli4_hba_intr_handler, 0, - name, - &phba->sli4_hba.hba_eq_hdl[index]); + name, eqhdl); if (rc) { lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, "0486 MSI-X fast-path (%d) " "request_irq failed (%d)\n", index, rc); goto cfg_fail_out; } + + eqhdl->irq = pci_irq_vector(phba->pcidev, index); + + if (phba->cfg_irq_numa) { + /* If found a neighboring online cpu, set affinity */ + if (cpu_select < nr_cpu_ids) + lpfc_irq_set_aff(eqhdl, cpu_select); + + /* Assign EQ to cpu_map */ + lpfc_assign_eq_map_info(phba, index, + LPFC_CPU_FIRST_IRQ, + cpu); + + /* Iterate to next offline or online cpu in numa_mask */ + cpu = cpumask_next(cpu, numa_mask); + + /* Find next online cpu in numa_mask to set affinity */ + cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu); + } else if (vectors == 1) { + cpu = cpumask_first(cpu_present_mask); + lpfc_assign_eq_map_info(phba, index, LPFC_CPU_FIRST_IRQ, + cpu); + } else { + maskp = pci_irq_get_affinity(phba->pcidev, index); + + first = true; + /* Loop through all CPUs associated with vector index */ + for_each_cpu_and(cpu, maskp, cpu_present_mask) { + /* If this is the first CPU thats assigned to + * this vector, set LPFC_CPU_FIRST_IRQ. + */ + lpfc_assign_eq_map_info(phba, index, + first ? + LPFC_CPU_FIRST_IRQ : 0, + cpu); + if (first) + first = false; + } + } } if (vectors != phba->cfg_irq_chann) { @@ -11020,17 +11487,18 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) phba->cfg_irq_chann, vectors); if (phba->cfg_irq_chann > vectors) phba->cfg_irq_chann = vectors; - if (phba->nvmet_support && (phba->cfg_nvmet_mrq > vectors)) - phba->cfg_nvmet_mrq = vectors; } return rc; cfg_fail_out: /* free the irq already requested */ - for (--index; index >= 0; index--) - free_irq(pci_irq_vector(phba->pcidev, index), - &phba->sli4_hba.hba_eq_hdl[index]); + for (--index; index >= 0; index--) { + eqhdl = lpfc_get_eq_hdl(index); + lpfc_irq_clear_aff(eqhdl); + irq_set_affinity_hint(eqhdl->irq, NULL); + free_irq(eqhdl->irq, eqhdl); + } /* Unconfigure MSI-X capability structure */ pci_free_irq_vectors(phba->pcidev); @@ -11057,6 +11525,8 @@ static int lpfc_sli4_enable_msi(struct lpfc_hba *phba) { int rc, index; + unsigned int cpu; + struct lpfc_hba_eq_hdl *eqhdl; rc = pci_alloc_irq_vectors(phba->pcidev, 1, 1, PCI_IRQ_MSI | PCI_IRQ_AFFINITY); @@ -11078,9 +11548,15 @@ lpfc_sli4_enable_msi(struct lpfc_hba *phba) return rc; } + eqhdl = lpfc_get_eq_hdl(0); + eqhdl->irq = pci_irq_vector(phba->pcidev, 0); + + cpu = cpumask_first(cpu_present_mask); + lpfc_assign_eq_map_info(phba, 0, LPFC_CPU_FIRST_IRQ, cpu); + for (index = 0; index < phba->cfg_irq_chann; index++) { - phba->sli4_hba.hba_eq_hdl[index].idx = index; - phba->sli4_hba.hba_eq_hdl[index].phba = phba; + eqhdl = lpfc_get_eq_hdl(index); + eqhdl->idx = index; } return 0; @@ -11138,15 +11614,21 @@ lpfc_sli4_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode) IRQF_SHARED, LPFC_DRIVER_NAME, phba); if (!retval) { struct lpfc_hba_eq_hdl *eqhdl; + unsigned int cpu; /* Indicate initialization to INTx mode */ phba->intr_type = INTx; intr_mode = 0; + eqhdl = lpfc_get_eq_hdl(0); + eqhdl->irq = pci_irq_vector(phba->pcidev, 0); + + cpu = cpumask_first(cpu_present_mask); + lpfc_assign_eq_map_info(phba, 0, LPFC_CPU_FIRST_IRQ, + cpu); for (idx = 0; idx < phba->cfg_irq_chann; idx++) { - eqhdl = &phba->sli4_hba.hba_eq_hdl[idx]; + eqhdl = lpfc_get_eq_hdl(idx); eqhdl->idx = idx; - eqhdl->phba = phba; } } } @@ -11168,14 +11650,14 @@ lpfc_sli4_disable_intr(struct lpfc_hba *phba) /* Disable the currently initialized interrupt mode */ if (phba->intr_type == MSIX) { int index; + struct lpfc_hba_eq_hdl *eqhdl; /* Free up MSI-X multi-message vectors */ for (index = 0; index < phba->cfg_irq_chann; index++) { - irq_set_affinity_hint( - pci_irq_vector(phba->pcidev, index), - NULL); - free_irq(pci_irq_vector(phba->pcidev, index), - &phba->sli4_hba.hba_eq_hdl[index]); + eqhdl = lpfc_get_eq_hdl(index); + lpfc_irq_clear_aff(eqhdl); + irq_set_affinity_hint(eqhdl->irq, NULL); + free_irq(eqhdl->irq, eqhdl); } } else { free_irq(phba->pcidev->irq, phba); @@ -11367,6 +11849,9 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba) /* Wait for completion of device XRI exchange busy */ lpfc_sli4_xri_exchange_busy_wait(phba); + /* per-phba callback de-registration for hotplug event */ + lpfc_cpuhp_remove(phba); + /* Disable PCI subsystem interrupt */ lpfc_sli4_disable_intr(phba); @@ -11538,6 +12023,7 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) sli4_params->cqav = bf_get(cfg_cqav, mbx_sli4_parameters); sli4_params->wqsize = bf_get(cfg_wqsize, mbx_sli4_parameters); sli4_params->bv1s = bf_get(cfg_bv1s, mbx_sli4_parameters); + sli4_params->pls = bf_get(cfg_pvl, mbx_sli4_parameters); sli4_params->sgl_pages_max = bf_get(cfg_sgl_page_cnt, mbx_sli4_parameters); sli4_params->wqpcnt = bf_get(cfg_wqpcnt, mbx_sli4_parameters); @@ -11589,13 +12075,10 @@ fcponly: } /* If the NVME FC4 type is enabled, scale the sg_seg_cnt to - * accommodate 512K and 1M IOs in a single nvme buf and supply - * enough NVME LS iocb buffers for larger connectivity counts. + * accommodate 512K and 1M IOs in a single nvme buf. */ - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) phba->cfg_sg_seg_cnt = LPFC_MAX_NVME_SEG_CNT; - phba->cfg_iocb_cnt = 5; - } /* Only embed PBDE for if_type 6, PBDE support requires xib be set */ if ((bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) != @@ -12312,35 +12795,57 @@ lpfc_sli4_get_iocb_cnt(struct lpfc_hba *phba) } -static void +static int lpfc_log_write_firmware_error(struct lpfc_hba *phba, uint32_t offset, uint32_t magic_number, uint32_t ftype, uint32_t fid, uint32_t fsize, const struct firmware *fw) { - if ((offset == ADD_STATUS_FW_NOT_SUPPORTED) || + int rc; + + /* Three cases: (1) FW was not supported on the detected adapter. + * (2) FW update has been locked out administratively. + * (3) Some other error during FW update. + * In each case, an unmaskable message is written to the console + * for admin diagnosis. + */ + if (offset == ADD_STATUS_FW_NOT_SUPPORTED || (phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC && - magic_number != MAGIC_NUMER_G6) || + magic_number != MAGIC_NUMBER_G6) || (phba->pcidev->device == PCI_DEVICE_ID_LANCER_G7_FC && - magic_number != MAGIC_NUMER_G7)) + magic_number != MAGIC_NUMBER_G7)) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "3030 This firmware version is not supported on " - "this HBA model. Device:%x Magic:%x Type:%x " - "ID:%x Size %d %zd\n", - phba->pcidev->device, magic_number, ftype, fid, - fsize, fw->size); - else + "3030 This firmware version is not supported on" + " this HBA model. Device:%x Magic:%x Type:%x " + "ID:%x Size %d %zd\n", + phba->pcidev->device, magic_number, ftype, fid, + fsize, fw->size); + rc = -EINVAL; + } else if (offset == ADD_STATUS_FW_DOWNLOAD_HW_DISABLED) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "3022 FW Download failed. Device:%x Magic:%x Type:%x " - "ID:%x Size %d %zd\n", - phba->pcidev->device, magic_number, ftype, fid, - fsize, fw->size); + "3021 Firmware downloads have been prohibited " + "by a system configuration setting on " + "Device:%x Magic:%x Type:%x ID:%x Size %d " + "%zd\n", + phba->pcidev->device, magic_number, ftype, fid, + fsize, fw->size); + rc = -EACCES; + } else { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3022 FW Download failed. Add Status x%x " + "Device:%x Magic:%x Type:%x ID:%x Size %d " + "%zd\n", + offset, phba->pcidev->device, magic_number, + ftype, fid, fsize, fw->size); + rc = -EIO; + } + return rc; } - /** * lpfc_write_firmware - attempt to write a firmware image to the port * @fw: pointer to firmware image returned from request_firmware. - * @phba: pointer to lpfc hba data structure. + * @context: pointer to firmware image returned from request_firmware. + * @ret: return value this routine provides to the caller. * **/ static void @@ -12409,8 +12914,12 @@ lpfc_write_firmware(const struct firmware *fw, void *context) rc = lpfc_wr_object(phba, &dma_buffer_list, (fw->size - offset), &offset); if (rc) { - lpfc_log_write_firmware_error(phba, offset, - magic_number, ftype, fid, fsize, fw); + rc = lpfc_log_write_firmware_error(phba, offset, + magic_number, + ftype, + fid, + fsize, + fw); goto release_out; } } @@ -12430,9 +12939,12 @@ release_out: } release_firmware(fw); out: - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "3024 Firmware update done: %d.\n", rc); - return; + if (rc < 0) + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3062 Firmware update error, status %d.\n", rc); + else + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3024 Firmware update success: size %d.\n", rc); } /** @@ -12551,6 +13063,12 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) phba->pport = NULL; lpfc_stop_port(phba); + /* Init cpu_map array */ + lpfc_cpu_map_array_init(phba); + + /* Init hba_eq_hdl array */ + lpfc_hba_eq_hdl_array_init(phba); + /* Configure and enable interrupt */ intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode); if (intr_mode == LPFC_INTR_ERROR) { @@ -12632,6 +13150,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) /* Enable RAS FW log support */ lpfc_sli4_ras_setup(phba); + INIT_LIST_HEAD(&phba->poll_list); + cpuhp_state_add_instance_nocalls(lpfc_cpuhp_state, &phba->cpuhp); + return 0; out_free_sysfs_attr: @@ -13344,8 +13865,7 @@ lpfc_sli4_oas_verify(struct lpfc_hba *phba) phba->cfg_fof = 1; } else { phba->cfg_fof = 0; - if (phba->device_data_mem_pool) - mempool_destroy(phba->device_data_mem_pool); + mempool_destroy(phba->device_data_mem_pool); phba->device_data_mem_pool = NULL; } @@ -13450,11 +13970,24 @@ lpfc_init(void) /* Initialize in case vector mapping is needed */ lpfc_present_cpu = num_present_cpus(); + error = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "lpfc/sli4:online", + lpfc_cpu_online, lpfc_cpu_offline); + if (error < 0) + goto cpuhp_failure; + lpfc_cpuhp_state = error; + error = pci_register_driver(&lpfc_driver); - if (error) { - fc_release_transport(lpfc_transport_template); - fc_release_transport(lpfc_vport_transport_template); - } + if (error) + goto unwind; + + return error; + +unwind: + cpuhp_remove_multi_state(lpfc_cpuhp_state); +cpuhp_failure: + fc_release_transport(lpfc_transport_template); + fc_release_transport(lpfc_vport_transport_template); return error; } @@ -13471,6 +14004,7 @@ lpfc_exit(void) { misc_deregister(&lpfc_mgmt_dev); pci_unregister_driver(&lpfc_driver); + cpuhp_remove_multi_state(lpfc_cpuhp_state); fc_release_transport(lpfc_transport_template); fc_release_transport(lpfc_vport_transport_template); idr_destroy(&lpfc_hba_index); |