13 files changed, 794 insertions, 226 deletions
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 2bd1e014103b..ea62322ffe2b 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -4097,9 +4097,9 @@ lpfc_topology_store(struct device *dev, struct device_attribute *attr,
 		}
 		if ((phba->pcidev->device == PCI_DEVICE_ID_LANCER_G6_FC ||
 		     phba->pcidev->device == PCI_DEVICE_ID_LANCER_G7_FC) &&
-		    val != FLAGS_TOPOLOGY_MODE_PT_PT) {
+		    val == 4) {
 			lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
-				"3114 Only non-FC-AL mode is supported\n");
+				"3114 Loop mode not supported\n");
 			return -EINVAL;
 		}
 		phba->cfg_topology = val;
@@ -5180,7 +5180,8 @@ lpfc_cq_max_proc_limit_store(struct device *dev, struct device_attribute *attr,
 
 	/* set the values on the cq's */
 	for (i = 0; i < phba->cfg_irq_chann; i++) {
-		eq = phba->sli4_hba.hdwq[i].hba_eq;
+		/* Get the EQ corresponding to the IRQ vector */
+		eq = phba->sli4_hba.hba_eq_hdl[i].eq;
 		if (!eq)
 			continue;
 
@@ -5301,35 +5302,44 @@ lpfc_fcp_cpu_map_show(struct device *dev, struct device_attribute *attr,
 				len += scnprintf(
 					buf + len, PAGE_SIZE - len,
 					"CPU %02d hdwq None "
-					"physid %d coreid %d ht %d\n",
+					"physid %d coreid %d ht %d ua %d\n",
 					phba->sli4_hba.curr_disp_cpu,
-					cpup->phys_id,
-					cpup->core_id, cpup->hyper);
+					cpup->phys_id, cpup->core_id,
+					(cpup->flag & LPFC_CPU_MAP_HYPER),
+					(cpup->flag & LPFC_CPU_MAP_UNASSIGN));
 			else
 				len += scnprintf(
 					buf + len, PAGE_SIZE - len,
 					"CPU %02d EQ %04d hdwq %04d "
-					"physid %d coreid %d ht %d\n",
+					"physid %d coreid %d ht %d ua %d\n",
 					phba->sli4_hba.curr_disp_cpu,
 					cpup->eq, cpup->hdwq, cpup->phys_id,
-					cpup->core_id, cpup->hyper);
+					cpup->core_id,
+					(cpup->flag & LPFC_CPU_MAP_HYPER),
+					(cpup->flag & LPFC_CPU_MAP_UNASSIGN));
 		} else {
 			if (cpup->hdwq == LPFC_VECTOR_MAP_EMPTY)
 				len += scnprintf(
 					buf + len, PAGE_SIZE - len,
 					"CPU %02d hdwq None "
-					"physid %d coreid %d ht %d IRQ %d\n",
+					"physid %d coreid %d ht %d ua %d IRQ %d\n",
 					phba->sli4_hba.curr_disp_cpu,
 					cpup->phys_id,
-					cpup->core_id, cpup->hyper, cpup->irq);
+					cpup->core_id,
+					(cpup->flag & LPFC_CPU_MAP_HYPER),
+					(cpup->flag & LPFC_CPU_MAP_UNASSIGN),
+					cpup->irq);
 			else
 				len += scnprintf(
 					buf + len, PAGE_SIZE - len,
 					"CPU %02d EQ %04d hdwq %04d "
-					"physid %d coreid %d ht %d IRQ %d\n",
+					"physid %d coreid %d ht %d ua %d IRQ %d\n",
 					phba->sli4_hba.curr_disp_cpu,
 					cpup->eq, cpup->hdwq, cpup->phys_id,
-					cpup->core_id, cpup->hyper, cpup->irq);
+					cpup->core_id,
+					(cpup->flag & LPFC_CPU_MAP_HYPER),
+					(cpup->flag & LPFC_CPU_MAP_UNASSIGN),
+					cpup->irq);
 		}
 
 		phba->sli4_hba.curr_disp_cpu++;
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index b0202bc0aa62..b7216d694bff 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -5741,7 +5741,7 @@ lpfc_get_trunk_info(struct bsg_job *job)
 
 	event_reply->port_speed = phba->sli4_hba.link_state.speed / 1000;
 	event_reply->logical_speed =
-				phba->sli4_hba.link_state.logical_speed / 100;
+				phba->sli4_hba.link_state.logical_speed / 1000;
 job_error:
 	bsg_reply->result = rc;
 	bsg_job_done(job, bsg_reply->result,
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 866374801140..68e9f96242d3 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -572,7 +572,8 @@ void lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba);
 void lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba,
 			struct lpfc_sli_ring *pring, struct lpfc_iocbq *piocb);
 void lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba, uint32_t idx,
-				struct rqb_dmabuf *nvmebuf, uint64_t isr_ts);
+				struct rqb_dmabuf *nvmebuf, uint64_t isr_ts,
+				uint8_t cqflag);
 void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba);
 void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
 				struct lpfc_iocbq *cmdiocb,
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 4812bbbf43cc..ec72c39997d2 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -2358,6 +2358,7 @@ static int
 lpfc_fdmi_port_attr_fc4type(struct lpfc_vport *vport,
 			    struct lpfc_fdmi_attr_def *ad)
 {
+	struct lpfc_hba   *phba = vport->phba;
 	struct lpfc_fdmi_attr_entry *ae;
 	uint32_t size;
 
@@ -2366,9 +2367,13 @@ lpfc_fdmi_port_attr_fc4type(struct lpfc_vport *vport,
 
 	ae->un.AttrTypes[3] = 0x02; /* Type 0x1 - ELS */
 	ae->un.AttrTypes[2] = 0x01; /* Type 0x8 - FCP */
-	if (vport->nvmei_support || vport->phba->nvmet_support)
-		ae->un.AttrTypes[6] = 0x01; /* Type 0x28 - NVME */
 	ae->un.AttrTypes[7] = 0x01; /* Type 0x20 - CT */
+
+	/* Check to see if Firmware supports NVME and on physical port */
+	if ((phba->sli_rev == LPFC_SLI_REV4) && (vport == phba->pport) &&
+	    phba->sli4_hba.pc_sli4_params.nvme)
+		ae->un.AttrTypes[6] = 0x01; /* Type 0x28 - NVME */
+
 	size = FOURBYTES + 32;
 	ad->AttrLen = cpu_to_be16(size);
 	ad->AttrType = cpu_to_be16(RPRT_SUPPORTED_FC4_TYPES);
@@ -2680,9 +2685,12 @@ lpfc_fdmi_port_attr_active_fc4type(struct lpfc_vport *vport,
 
 	ae->un.AttrTypes[3] = 0x02; /* Type 0x1 - ELS */
 	ae->un.AttrTypes[2] = 0x01; /* Type 0x8 - FCP */
+	ae->un.AttrTypes[7] = 0x01; /* Type 0x20 - CT */
+
+	/* Check to see if NVME is configured or not */
 	if (vport->phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
 		ae->un.AttrTypes[6] = 0x1; /* Type 0x28 - NVME */
-	ae->un.AttrTypes[7] = 0x01; /* Type 0x20 - CT */
+
 	size = FOURBYTES + 32;
 	ad->AttrLen = cpu_to_be16(size);
 	ad->AttrType = cpu_to_be16(RPRT_ACTIVE_FC4_TYPES);
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 968ed0fd37f7..f12780f4cfbb 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -4308,6 +4308,7 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 		if ((rspiocb->iocb.ulpStatus == 0)
 		    && (ndlp->nlp_flag & NLP_ACC_REGLOGIN)) {
 			if (!lpfc_unreg_rpi(vport, ndlp) &&
+			    (!(vport->fc_flag & FC_PT2PT)) &&
 			    (ndlp->nlp_state ==  NLP_STE_PLOGI_ISSUE ||
 			     ndlp->nlp_state == NLP_STE_REG_LOGIN_ISSUE)) {
 				lpfc_printf_vlog(vport, KERN_INFO,
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index eaaef682de25..6d6b14295734 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -72,7 +72,7 @@ unsigned long _dump_buf_dif_order;
 spinlock_t _dump_buf_lock;
 
 /* Used when mapping IRQ vectors in a driver centric manner */
-uint32_t lpfc_present_cpu;
+static uint32_t lpfc_present_cpu;
 
 static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *);
 static int lpfc_post_rcv_buf(struct lpfc_hba *);
@@ -93,8 +93,8 @@ static void lpfc_sli4_cq_event_release_all(struct lpfc_hba *);
 static void lpfc_sli4_disable_intr(struct lpfc_hba *);
 static uint32_t lpfc_sli4_enable_intr(struct lpfc_hba *, uint32_t);
 static void lpfc_sli4_oas_verify(struct lpfc_hba *phba);
-static uint16_t lpfc_find_eq_handle(struct lpfc_hba *, uint16_t);
 static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int);
+static void lpfc_setup_bg(struct lpfc_hba *, struct Scsi_Host *);
 
 static struct scsi_transport_template *lpfc_transport_template = NULL;
 static struct scsi_transport_template *lpfc_vport_transport_template = NULL;
@@ -1274,8 +1274,10 @@ lpfc_hb_eq_delay_work(struct work_struct *work)
 	if (!eqcnt)
 		goto requeue;
 
+	/* Loop thru all IRQ vectors */
 	for (i = 0; i < phba->cfg_irq_chann; i++) {
-		eq = phba->sli4_hba.hdwq[i].hba_eq;
+		/* Get the EQ corresponding to the IRQ vector */
+		eq = phba->sli4_hba.hba_eq_hdl[i].eq;
 		if (eq && eqcnt[eq->last_cpu] < 2)
 			eqcnt[eq->last_cpu]++;
 		continue;
@@ -4114,14 +4116,13 @@ lpfc_new_io_buf(struct lpfc_hba *phba, int num_to_alloc)
 		 * pci bus space for an I/O. The DMA buffer includes the
 		 * number of SGE's necessary to support the sg_tablesize.
 		 */
-		lpfc_ncmd->data = dma_pool_alloc(phba->lpfc_sg_dma_buf_pool,
-				GFP_KERNEL,
-				&lpfc_ncmd->dma_handle);
+		lpfc_ncmd->data = dma_pool_zalloc(phba->lpfc_sg_dma_buf_pool,
+						  GFP_KERNEL,
+						  &lpfc_ncmd->dma_handle);
 		if (!lpfc_ncmd->data) {
 			kfree(lpfc_ncmd);
 			break;
 		}
-		memset(lpfc_ncmd->data, 0, phba->cfg_sg_dma_buf_size);
 
 		/*
 		 * 4K Page alignment is CRITICAL to BlockGuard, double check
@@ -4347,6 +4348,9 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 
 	timer_setup(&vport->delayed_disc_tmo, lpfc_delayed_disc_tmo, 0);
 
+	if (phba->sli3_options & LPFC_SLI3_BG_ENABLED)
+		lpfc_setup_bg(phba, shost);
+
 	error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev);
 	if (error)
 		goto out_put_shost;
@@ -5055,7 +5059,7 @@ lpfc_update_trunk_link_status(struct lpfc_hba *phba,
 				bf_get(lpfc_acqe_fc_la_speed, acqe_fc));
 
 	phba->sli4_hba.link_state.logical_speed =
-				bf_get(lpfc_acqe_fc_la_llink_spd, acqe_fc);
+				bf_get(lpfc_acqe_fc_la_llink_spd, acqe_fc) * 10;
 	/* We got FC link speed, convert to fc_linkspeed (READ_TOPOLOGY) */
 	phba->fc_linkspeed =
 		 lpfc_async_link_speed_to_read_top(
@@ -5158,8 +5162,14 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc)
 				bf_get(lpfc_acqe_fc_la_port_number, acqe_fc);
 	phba->sli4_hba.link_state.fault =
 				bf_get(lpfc_acqe_link_fault, acqe_fc);
-	phba->sli4_hba.link_state.logical_speed =
+
+	if (bf_get(lpfc_acqe_fc_la_att_type, acqe_fc) ==
+	    LPFC_FC_LA_TYPE_LINK_DOWN)
+		phba->sli4_hba.link_state.logical_speed = 0;
+	else if	(!phba->sli4_hba.conf_trunk)
+		phba->sli4_hba.link_state.logical_speed =
 				bf_get(lpfc_acqe_fc_la_llink_spd, acqe_fc) * 10;
+
 	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
 			"2896 Async FC event - Speed:%dGBaud Topology:x%x "
 			"LA Type:x%x Port Type:%d Port Number:%d Logical speed:"
@@ -6551,6 +6561,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
 		spin_lock_init(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
 		INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_io_wait_list);
+		spin_lock_init(&phba->sli4_hba.t_active_list_lock);
+		INIT_LIST_HEAD(&phba->sli4_hba.t_active_ctx_list);
 	}
 
 	/* This abort list used by worker thread */
@@ -7660,8 +7672,6 @@ lpfc_post_init_setup(struct lpfc_hba *phba)
 	 */
 	shost = pci_get_drvdata(phba->pcidev);
 	shost->can_queue = phba->cfg_hba_queue_depth - 10;
-	if (phba->sli3_options & LPFC_SLI3_BG_ENABLED)
-		lpfc_setup_bg(phba, shost);
 
 	lpfc_host_attrib_init(shost);
 
@@ -8740,8 +8750,10 @@ int
 lpfc_sli4_queue_create(struct lpfc_hba *phba)
 {
 	struct lpfc_queue *qdesc;
-	int idx, eqidx, cpu;
+	int idx, cpu, eqcpu;
 	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_vector_map_info *cpup;
+	struct lpfc_vector_map_info *eqcpup;
 	struct lpfc_eq_intr_info *eqi;
 
 	/*
@@ -8826,40 +8838,60 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
 	INIT_LIST_HEAD(&phba->sli4_hba.lpfc_wq_list);
 
 	/* Create HBA Event Queues (EQs) */
-	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
-		/* determine EQ affinity */
-		eqidx = lpfc_find_eq_handle(phba, idx);
-		cpu = lpfc_find_cpu_handle(phba, eqidx, LPFC_FIND_BY_EQ);
-		/*
-		 * If there are more Hardware Queues than available
-		 * EQs, multiple Hardware Queues may share a common EQ.
+	for_each_present_cpu(cpu) {
+		/* We only want to create 1 EQ per vector, even though
+		 * multiple CPUs might be using that vector. so only
+		 * selects the CPUs that are LPFC_CPU_FIRST_IRQ.
 		 */
-		if (idx >= phba->cfg_irq_chann) {
-			/* Share an existing EQ */
-			phba->sli4_hba.hdwq[idx].hba_eq =
-				phba->sli4_hba.hdwq[eqidx].hba_eq;
+		cpup = &phba->sli4_hba.cpu_map[cpu];
+		if (!(cpup->flag & LPFC_CPU_FIRST_IRQ))
 			continue;
-		}
-		/* Create an EQ */
+
+		/* Get a ptr to the Hardware Queue associated with this CPU */
+		qp = &phba->sli4_hba.hdwq[cpup->hdwq];
+
+		/* Allocate an EQ */
 		qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE,
 					      phba->sli4_hba.eq_esize,
 					      phba->sli4_hba.eq_ecount, cpu);
 		if (!qdesc) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0497 Failed allocate EQ (%d)\n", idx);
+					"0497 Failed allocate EQ (%d)\n",
+					cpup->hdwq);
 			goto out_error;
 		}
 		qdesc->qe_valid = 1;
-		qdesc->hdwq = idx;
-
-		/* Save the CPU this EQ is affinitised to */
-		qdesc->chann = cpu;
-		phba->sli4_hba.hdwq[idx].hba_eq = qdesc;
+		qdesc->hdwq = cpup->hdwq;
+		qdesc->chann = cpu; /* First CPU this EQ is affinitised to */
 		qdesc->last_cpu = qdesc->chann;
+
+		/* Save the allocated EQ in the Hardware Queue */
+		qp->hba_eq = qdesc;
+
 		eqi = per_cpu_ptr(phba->sli4_hba.eq_info, qdesc->last_cpu);
 		list_add(&qdesc->cpu_list, &eqi->list);
 	}
 
+	/* Now we need to populate the other Hardware Queues, that share
+	 * an IRQ vector, with the associated EQ ptr.
+	 */
+	for_each_present_cpu(cpu) {
+		cpup = &phba->sli4_hba.cpu_map[cpu];
+
+		/* Check for EQ already allocated in previous loop */
+		if (cpup->flag & LPFC_CPU_FIRST_IRQ)
+			continue;
+
+		/* Check for multiple CPUs per hdwq */
+		qp = &phba->sli4_hba.hdwq[cpup->hdwq];
+		if (qp->hba_eq)
+			continue;
+
+		/* We need to share an EQ for this hdwq */
+		eqcpu = lpfc_find_cpu_handle(phba, cpup->eq, LPFC_FIND_BY_EQ);
+		eqcpup = &phba->sli4_hba.cpu_map[eqcpu];
+		qp->hba_eq = phba->sli4_hba.hdwq[eqcpup->hdwq].hba_eq;
+	}
 
 	/* Allocate SCSI SLI4 CQ/WQs */
 	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
@@ -9122,23 +9154,31 @@ static inline void
 lpfc_sli4_release_hdwq(struct lpfc_hba *phba)
 {
 	struct lpfc_sli4_hdw_queue *hdwq;
+	struct lpfc_queue *eq;
 	uint32_t idx;
 
 	hdwq = phba->sli4_hba.hdwq;
-	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
-		if (idx < phba->cfg_irq_chann)
-			lpfc_sli4_queue_free(hdwq[idx].hba_eq);
-		hdwq[idx].hba_eq = NULL;
 
+	/* Loop thru all Hardware Queues */
+	for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
+		/* Free the CQ/WQ corresponding to the Hardware Queue */
 		lpfc_sli4_queue_free(hdwq[idx].fcp_cq);
 		lpfc_sli4_queue_free(hdwq[idx].nvme_cq);
 		lpfc_sli4_queue_free(hdwq[idx].fcp_wq);
 		lpfc_sli4_queue_free(hdwq[idx].nvme_wq);
+		hdwq[idx].hba_eq = NULL;
 		hdwq[idx].fcp_cq = NULL;
 		hdwq[idx].nvme_cq = NULL;
 		hdwq[idx].fcp_wq = NULL;
 		hdwq[idx].nvme_wq = NULL;
 	}
+	/* Loop thru all IRQ vectors */
+	for (idx = 0; idx < phba->cfg_irq_chann; idx++) {
+		/* Free the EQ corresponding to the IRQ vector */
+		eq = phba->sli4_hba.hba_eq_hdl[idx].eq;
+		lpfc_sli4_queue_free(eq);
+		phba->sli4_hba.hba_eq_hdl[idx].eq = NULL;
+	}
 }
 
 /**
@@ -9316,16 +9356,17 @@ static void
 lpfc_setup_cq_lookup(struct lpfc_hba *phba)
 {
 	struct lpfc_queue *eq, *childq;
-	struct lpfc_sli4_hdw_queue *qp;
 	int qidx;
 
-	qp = phba->sli4_hba.hdwq;
 	memset(phba->sli4_hba.cq_lookup, 0,
 	       (sizeof(struct lpfc_queue *) * (phba->sli4_hba.cq_max + 1)));
+	/* Loop thru all IRQ vectors */
 	for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
-		eq = qp[qidx].hba_eq;
+		/* Get the EQ corresponding to the IRQ vector */
+		eq = phba->sli4_hba.hba_eq_hdl[qidx].eq;
 		if (!eq)
 			continue;
+		/* Loop through all CQs associated with that EQ */
 		list_for_each_entry(childq, &eq->child_list, list) {
 			if (childq->queue_id > phba->sli4_hba.cq_max)
 				continue;
@@ -9354,9 +9395,10 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 {
 	uint32_t shdr_status, shdr_add_status;
 	union lpfc_sli4_cfg_shdr *shdr;
+	struct lpfc_vector_map_info *cpup;
 	struct lpfc_sli4_hdw_queue *qp;
 	LPFC_MBOXQ_t *mboxq;
-	int qidx;
+	int qidx, cpu;
 	uint32_t length, usdelay;
 	int rc = -ENOMEM;
 
@@ -9417,32 +9459,55 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 		rc = -ENOMEM;
 		goto out_error;
 	}
+
+	/* Loop thru all IRQ vectors */
 	for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
-		if (!qp[qidx].hba_eq) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0522 Fast-path EQ (%d) not "
-					"allocated\n", qidx);
-			rc = -ENOMEM;
-			goto out_destroy;
-		}
-		rc = lpfc_eq_create(phba, qp[qidx].hba_eq,
-				    phba->cfg_fcp_imax);
-		if (rc) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-					"0523 Failed setup of fast-path EQ "
-					"(%d), rc = 0x%x\n", qidx,
-					(uint32_t)rc);
-			goto out_destroy;
+		/* Create HBA Event Queues (EQs) in order */
+		for_each_present_cpu(cpu) {
+			cpup = &phba->sli4_hba.cpu_map[cpu];
+
+			/* Look for the CPU thats using that vector with
+			 * LPFC_CPU_FIRST_IRQ set.
+			 */
+			if (!(cpup->flag & LPFC_CPU_FIRST_IRQ))
+				continue;
+			if (qidx != cpup->eq)
+				continue;
+
+			/* Create an EQ for that vector */
+			rc = lpfc_eq_create(phba, qp[cpup->hdwq].hba_eq,
+					    phba->cfg_fcp_imax);
+			if (rc) {
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"0523 Failed setup of fast-path"
+						" EQ (%d), rc = 0x%x\n",
+						cpup->eq, (uint32_t)rc);
+				goto out_destroy;
+			}
+
+			/* Save the EQ for that vector in the hba_eq_hdl */
+			phba->sli4_hba.hba_eq_hdl[cpup->eq].eq =
+				qp[cpup->hdwq].hba_eq;
+
+			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+					"2584 HBA EQ setup: queue[%d]-id=%d\n",
+					cpup->eq,
+					qp[cpup->hdwq].hba_eq->queue_id);
 		}
-		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-				"2584 HBA EQ setup: queue[%d]-id=%d\n", qidx,
-				qp[qidx].hba_eq->queue_id);
 	}
 
+	/* Loop thru all Hardware Queues */
 	if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
 		for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+			cpu = lpfc_find_cpu_handle(phba, qidx,
+						   LPFC_FIND_BY_HDWQ);
+			cpup = &phba->sli4_hba.cpu_map[cpu];
+
+			/* Create the CQ/WQ corresponding to the
+			 * Hardware Queue
+			 */
 			rc = lpfc_create_wq_cq(phba,
-					qp[qidx].hba_eq,
+					phba->sli4_hba.hdwq[cpup->hdwq].hba_eq,
 					qp[qidx].nvme_cq,
 					qp[qidx].nvme_wq,
 					&phba->sli4_hba.hdwq[qidx].nvme_cq_map,
@@ -9458,8 +9523,12 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba)
 	}
 
 	for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+		cpu = lpfc_find_cpu_handle(phba, qidx, LPFC_FIND_BY_HDWQ);
+		cpup = &phba->sli4_hba.cpu_map[cpu];
+
+		/* Create the CQ/WQ corresponding to the Hardware Queue */
 		rc = lpfc_create_wq_cq(phba,
-				       qp[qidx].hba_eq,
+				       phba->sli4_hba.hdwq[cpup->hdwq].hba_eq,
 				       qp[qidx].fcp_cq,
 				       qp[qidx].fcp_wq,
 				       &phba->sli4_hba.hdwq[qidx].fcp_cq_map,
@@ -9711,6 +9780,7 @@ void
 lpfc_sli4_queue_unset(struct lpfc_hba *phba)
 {
 	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_queue *eq;
 	int qidx;
 
 	/* Unset mailbox command work queue */
@@ -9762,14 +9832,20 @@ lpfc_sli4_queue_unset(struct lpfc_hba *phba)
 
 	/* Unset fast-path SLI4 queues */
 	if (phba->sli4_hba.hdwq) {
+		/* Loop thru all Hardware Queues */
 		for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+			/* Destroy the CQ/WQ corresponding to Hardware Queue */
 			qp = &phba->sli4_hba.hdwq[qidx];
 			lpfc_wq_destroy(phba, qp->fcp_wq);
 			lpfc_wq_destroy(phba, qp->nvme_wq);
 			lpfc_cq_destroy(phba, qp->fcp_cq);
 			lpfc_cq_destroy(phba, qp->nvme_cq);
-			if (qidx < phba->cfg_irq_chann)
-				lpfc_eq_destroy(phba, qp->hba_eq);
+		}
+		/* Loop thru all IRQ vectors */
+		for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
+			/* Destroy the EQ corresponding to the IRQ vector */
+			eq = phba->sli4_hba.hba_eq_hdl[qidx].eq;
+			lpfc_eq_destroy(phba, eq);
 		}
 	}
 
@@ -10559,11 +10635,12 @@ lpfc_sli_disable_intr(struct lpfc_hba *phba)
 }
 
 /**
- * lpfc_find_cpu_handle - Find the CPU that corresponds to the specified EQ
+ * lpfc_find_cpu_handle - Find the CPU that corresponds to the specified Queue
  * @phba: pointer to lpfc hba data structure.
  * @id: EQ vector index or Hardware Queue index
  * @match: LPFC_FIND_BY_EQ = match by EQ
  *         LPFC_FIND_BY_HDWQ = match by Hardware Queue
+ * Return the CPU that matches the selection criteria
  */
 static uint16_t
 lpfc_find_cpu_handle(struct lpfc_hba *phba, uint16_t id, int match)
@@ -10571,40 +10648,27 @@ lpfc_find_cpu_handle(struct lpfc_hba *phba, uint16_t id, int match)
 	struct lpfc_vector_map_info *cpup;
 	int cpu;
 
-	/* Find the desired phys_id for the specified EQ */
+	/* Loop through all CPUs */
 	for_each_present_cpu(cpu) {
 		cpup = &phba->sli4_hba.cpu_map[cpu];
+
+		/* If we are matching by EQ, there may be multiple CPUs using
+		 * using the same vector, so select the one with
+		 * LPFC_CPU_FIRST_IRQ set.
+		 */
 		if ((match == LPFC_FIND_BY_EQ) &&
+		    (cpup->flag & LPFC_CPU_FIRST_IRQ) &&
 		    (cpup->irq != LPFC_VECTOR_MAP_EMPTY) &&
 		    (cpup->eq == id))
 			return cpu;
+
+		/* If matching by HDWQ, select the first CPU that matches */
 		if ((match == LPFC_FIND_BY_HDWQ) && (cpup->hdwq == id))
 			return cpu;
 	}
 	return 0;
 }
 
-/**
- * lpfc_find_eq_handle - Find the EQ that corresponds to the specified
- *                       Hardware Queue
- * @phba: pointer to lpfc hba data structure.
- * @hdwq: Hardware Queue index
- */
-static uint16_t
-lpfc_find_eq_handle(struct lpfc_hba *phba, uint16_t hdwq)
-{
-	struct lpfc_vector_map_info *cpup;
-	int cpu;
-
-	/* Find the desired phys_id for the specified EQ */
-	for_each_present_cpu(cpu) {
-		cpup = &phba->sli4_hba.cpu_map[cpu];
-		if (cpup->hdwq == hdwq)
-			return cpup->eq;
-	}
-	return 0;
-}
-
 #ifdef CONFIG_X86
 /**
  * lpfc_find_hyper - Determine if the CPU map entry is hyper-threaded
@@ -10645,24 +10709,31 @@ lpfc_find_hyper(struct lpfc_hba *phba, int cpu,
 static void
 lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
 {
-	int i, cpu, idx;
+	int i, cpu, idx, new_cpu, start_cpu, first_cpu;
 	int max_phys_id, min_phys_id;
 	int max_core_id, min_core_id;
 	struct lpfc_vector_map_info *cpup;
+	struct lpfc_vector_map_info *new_cpup;
 	const struct cpumask *maskp;
 #ifdef CONFIG_X86
 	struct cpuinfo_x86 *cpuinfo;
 #endif
 
 	/* Init cpu_map array */
-	memset(phba->sli4_hba.cpu_map, 0xff,
-	       (sizeof(struct lpfc_vector_map_info) *
-	       phba->sli4_hba.num_possible_cpu));
+	for_each_possible_cpu(cpu) {
+		cpup = &phba->sli4_hba.cpu_map[cpu];
+		cpup->phys_id = LPFC_VECTOR_MAP_EMPTY;
+		cpup->core_id = LPFC_VECTOR_MAP_EMPTY;
+		cpup->hdwq = LPFC_VECTOR_MAP_EMPTY;
+		cpup->eq = LPFC_VECTOR_MAP_EMPTY;
+		cpup->irq = LPFC_VECTOR_MAP_EMPTY;
+		cpup->flag = 0;
+	}
 
 	max_phys_id = 0;
-	min_phys_id = 0xffff;
+	min_phys_id = LPFC_VECTOR_MAP_EMPTY;
 	max_core_id = 0;
-	min_core_id = 0xffff;
+	min_core_id = LPFC_VECTOR_MAP_EMPTY;
 
 	/* Update CPU map with physical id and core id of each CPU */
 	for_each_present_cpu(cpu) {
@@ -10671,13 +10742,12 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
 		cpuinfo = &cpu_data(cpu);
 		cpup->phys_id = cpuinfo->phys_proc_id;
 		cpup->core_id = cpuinfo->cpu_core_id;
-		cpup->hyper = lpfc_find_hyper(phba, cpu,
-					      cpup->phys_id, cpup->core_id);
+		if (lpfc_find_hyper(phba, cpu, cpup->phys_id, cpup->core_id))
+			cpup->flag |= LPFC_CPU_MAP_HYPER;
 #else
 		/* No distinction between CPUs for other platforms */
 		cpup->phys_id = 0;
 		cpup->core_id = cpu;
-		cpup->hyper = 0;
 #endif
 
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
@@ -10703,23 +10773,216 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
 		eqi->icnt = 0;
 	}
 
+	/* This loop sets up all CPUs that are affinitized with a
+	 * irq vector assigned to the driver. All affinitized CPUs
+	 * will get a link to that vectors IRQ and EQ.
+	 */
 	for (idx = 0; idx <  phba->cfg_irq_chann; idx++) {
+		/* Get a CPU mask for all CPUs affinitized to this vector */
 		maskp = pci_irq_get_affinity(phba->pcidev, idx);
 		if (!maskp)
 			continue;
 
+		i = 0;
+		/* Loop through all CPUs associated with vector idx */
 		for_each_cpu_and(cpu, maskp, cpu_present_mask) {
+			/* Set the EQ index and IRQ for that vector */
 			cpup = &phba->sli4_hba.cpu_map[cpu];
 			cpup->eq = idx;
-			cpup->hdwq = idx;
 			cpup->irq = pci_irq_vector(phba->pcidev, idx);
 
-			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 					"3336 Set Affinity: CPU %d "
-					"hdwq %d irq %d\n",
-					cpu, cpup->hdwq, cpup->irq);
+					"irq %d eq %d\n",
+					cpu, cpup->irq, cpup->eq);
+
+			/* If this is the first CPU thats assigned to this
+			 * vector, set LPFC_CPU_FIRST_IRQ.
+			 */
+			if (!i)
+				cpup->flag |= LPFC_CPU_FIRST_IRQ;
+			i++;
 		}
 	}
+
+	/* After looking at each irq vector assigned to this pcidev, its
+	 * possible to see that not ALL CPUs have been accounted for.
+	 * Next we will set any unassigned (unaffinitized) cpu map
+	 * entries to a IRQ on the same phys_id.
+	 */
+	first_cpu = cpumask_first(cpu_present_mask);
+	start_cpu = first_cpu;
+
+	for_each_present_cpu(cpu) {
+		cpup = &phba->sli4_hba.cpu_map[cpu];
+
+		/* Is this CPU entry unassigned */
+		if (cpup->eq == LPFC_VECTOR_MAP_EMPTY) {
+			/* Mark CPU as IRQ not assigned by the kernel */
+			cpup->flag |= LPFC_CPU_MAP_UNASSIGN;
+
+			/* If so, find a new_cpup thats on the the SAME
+			 * phys_id as cpup. start_cpu will start where we
+			 * left off so all unassigned entries don't get assgined
+			 * the IRQ of the first entry.
+			 */
+			new_cpu = start_cpu;
+			for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+				new_cpup = &phba->sli4_hba.cpu_map[new_cpu];
+				if (!(new_cpup->flag & LPFC_CPU_MAP_UNASSIGN) &&
+				    (new_cpup->irq != LPFC_VECTOR_MAP_EMPTY) &&
+				    (new_cpup->phys_id == cpup->phys_id))
+					goto found_same;
+				new_cpu = cpumask_next(
+					new_cpu, cpu_present_mask);
+				if (new_cpu == nr_cpumask_bits)
+					new_cpu = first_cpu;
+			}
+			/* At this point, we leave the CPU as unassigned */
+			continue;
+found_same:
+			/* We found a matching phys_id, so copy the IRQ info */
+			cpup->eq = new_cpup->eq;
+			cpup->irq = new_cpup->irq;
+
+			/* Bump start_cpu to the next slot to minmize the
+			 * chance of having multiple unassigned CPU entries
+			 * selecting the same IRQ.
+			 */
+			start_cpu = cpumask_next(new_cpu, cpu_present_mask);
+			if (start_cpu == nr_cpumask_bits)
+				start_cpu = first_cpu;
+
+			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+					"3337 Set Affinity: CPU %d "
+					"irq %d from id %d same "
+					"phys_id (%d)\n",
+					cpu, cpup->irq, new_cpu, cpup->phys_id);
+		}
+	}
+
+	/* Set any unassigned cpu map entries to a IRQ on any phys_id */
+	start_cpu = first_cpu;
+
+	for_each_present_cpu(cpu) {
+		cpup = &phba->sli4_hba.cpu_map[cpu];
+
+		/* Is this entry unassigned */
+		if (cpup->eq == LPFC_VECTOR_MAP_EMPTY) {
+			/* Mark it as IRQ not assigned by the kernel */
+			cpup->flag |= LPFC_CPU_MAP_UNASSIGN;
+
+			/* If so, find a new_cpup thats on ANY phys_id
+			 * as the cpup. start_cpu will start where we
+			 * left off so all unassigned entries don't get
+			 * assigned the IRQ of the first entry.
+			 */
+			new_cpu = start_cpu;
+			for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+				new_cpup = &phba->sli4_hba.cpu_map[new_cpu];
+				if (!(new_cpup->flag & LPFC_CPU_MAP_UNASSIGN) &&
+				    (new_cpup->irq != LPFC_VECTOR_MAP_EMPTY))
+					goto found_any;
+				new_cpu = cpumask_next(
+					new_cpu, cpu_present_mask);
+				if (new_cpu == nr_cpumask_bits)
+					new_cpu = first_cpu;
+			}
+			/* We should never leave an entry unassigned */
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+					"3339 Set Affinity: CPU %d "
+					"irq %d UNASSIGNED\n",
+					cpup->hdwq, cpup->irq);
+			continue;
+found_any:
+			/* We found an available entry, copy the IRQ info */
+			cpup->eq = new_cpup->eq;
+			cpup->irq = new_cpup->irq;
+
+			/* Bump start_cpu to the next slot to minmize the
+			 * chance of having multiple unassigned CPU entries
+			 * selecting the same IRQ.
+			 */
+			start_cpu = cpumask_next(new_cpu, cpu_present_mask);
+			if (start_cpu == nr_cpumask_bits)
+				start_cpu = first_cpu;
+
+			lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+					"3338 Set Affinity: CPU %d "
+					"irq %d from id %d (%d/%d)\n",
+					cpu, cpup->irq, new_cpu,
+					new_cpup->phys_id, new_cpup->core_id);
+		}
+	}
+
+	/* Finally we need to associate a hdwq with each cpu_map entry
+	 * This will be 1 to 1 - hdwq to cpu, unless there are less
+	 * hardware queues then CPUs. For that case we will just round-robin
+	 * the available hardware queues as they get assigned to CPUs.
+	 */
+	idx = 0;
+	start_cpu = 0;
+	for_each_present_cpu(cpu) {
+		cpup = &phba->sli4_hba.cpu_map[cpu];
+		if (idx >=  phba->cfg_hdw_queue) {
+			/* We need to reuse a Hardware Queue for another CPU,
+			 * so be smart about it and pick one that has its
+			 * IRQ/EQ mapped to the same phys_id (CPU package).
+			 * and core_id.
+			 */
+			new_cpu = start_cpu;
+			for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+				new_cpup = &phba->sli4_hba.cpu_map[new_cpu];
+				if ((new_cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) &&
+				    (new_cpup->phys_id == cpup->phys_id) &&
+				    (new_cpup->core_id == cpup->core_id))
+					goto found_hdwq;
+				new_cpu = cpumask_next(
+					new_cpu, cpu_present_mask);
+				if (new_cpu == nr_cpumask_bits)
+					new_cpu = first_cpu;
+			}
+
+			/* If we can't match both phys_id and core_id,
+			 * settle for just a phys_id match.
+			 */
+			new_cpu = start_cpu;
+			for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+				new_cpup = &phba->sli4_hba.cpu_map[new_cpu];
+				if ((new_cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) &&
+				    (new_cpup->phys_id == cpup->phys_id))
+					goto found_hdwq;
+				new_cpu = cpumask_next(
+					new_cpu, cpu_present_mask);
+				if (new_cpu == nr_cpumask_bits)
+					new_cpu = first_cpu;
+			}
+
+			/* Otherwise just round robin on cfg_hdw_queue */
+			cpup->hdwq = idx % phba->cfg_hdw_queue;
+			goto logit;
+found_hdwq:
+			/* We found an available entry, copy the IRQ info */
+			start_cpu = cpumask_next(new_cpu, cpu_present_mask);
+			if (start_cpu == nr_cpumask_bits)
+				start_cpu = first_cpu;
+			cpup->hdwq = new_cpup->hdwq;
+		} else {
+			/* 1 to 1, CPU to hdwq */
+			cpup->hdwq = idx;
+		}
+logit:
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+				"3335 Set Affinity: CPU %d (phys %d core %d): "
+				"hdwq %d eq %d irq %d flg x%x\n",
+				cpu, cpup->phys_id, cpup->core_id,
+				cpup->hdwq, cpup->eq, cpup->irq, cpup->flag);
+		idx++;
+	}
+
+	/* The cpu_map array will be used later during initialization
+	 * when EQ / CQ / WQs are allocated and configured.
+	 */
 	return;
 }
 
@@ -11331,24 +11594,43 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
 					   mbx_sli4_parameters);
 	phba->sli4_hba.extents_in_use = bf_get(cfg_ext, mbx_sli4_parameters);
 	phba->sli4_hba.rpi_hdrs_in_use = bf_get(cfg_hdrr, mbx_sli4_parameters);
-	phba->nvme_support = (bf_get(cfg_nvme, mbx_sli4_parameters) &&
-			      bf_get(cfg_xib, mbx_sli4_parameters));
-
-	if ((phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP) ||
-	    !phba->nvme_support) {
-		phba->nvme_support = 0;
-		phba->nvmet_support = 0;
-		phba->cfg_nvmet_mrq = 0;
-		lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_NVME,
-				"6101 Disabling NVME support: "
-				"Not supported by firmware: %d %d\n",
-				bf_get(cfg_nvme, mbx_sli4_parameters),
-				bf_get(cfg_xib, mbx_sli4_parameters));
-
-		/* If firmware doesn't support NVME, just use SCSI support */
-		if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
-			return -ENODEV;
-		phba->cfg_enable_fc4_type = LPFC_ENABLE_FCP;
+
+	/* Check for firmware nvme support */
+	rc = (bf_get(cfg_nvme, mbx_sli4_parameters) &&
+		     bf_get(cfg_xib, mbx_sli4_parameters));
+
+	if (rc) {
+		/* Save this to indicate the Firmware supports NVME */
+		sli4_params->nvme = 1;
+
+		/* Firmware NVME support, check driver FC4 NVME support */
+		if (phba->cfg_enable_fc4_type == LPFC_ENABLE_FCP) {
+			lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_NVME,
+					"6133 Disabling NVME support: "
+					"FC4 type not supported: x%x\n",
+					phba->cfg_enable_fc4_type);
+			goto fcponly;
+		}
+	} else {
+		/* No firmware NVME support, check driver FC4 NVME support */
+		sli4_params->nvme = 0;
+		if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+			lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_NVME,
+					"6101 Disabling NVME support: Not "
+					"supported by firmware (%d %d) x%x\n",
+					bf_get(cfg_nvme, mbx_sli4_parameters),
+					bf_get(cfg_xib, mbx_sli4_parameters),
+					phba->cfg_enable_fc4_type);
+fcponly:
+			phba->nvme_support = 0;
+			phba->nvmet_support = 0;
+			phba->cfg_nvmet_mrq = 0;
+
+			/* If no FC4 type support, move to just SCSI support */
+			if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
+				return -ENODEV;
+			phba->cfg_enable_fc4_type = LPFC_ENABLE_FCP;
+		}
 	}
 
 	/* Only embed PBDE for if_type 6, PBDE support requires xib be set */
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index fdd16d9f55a1..946642cee3df 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -2143,7 +2143,9 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
 			   struct completion *lport_unreg_cmp)
 {
 	u32 wait_tmo;
-	int ret;
+	int ret, i, pending = 0;
+	struct lpfc_sli_ring  *pring;
+	struct lpfc_hba  *phba = vport->phba;
 
 	/* Host transport has to clean up and confirm requiring an indefinite
 	 * wait. Print a message if a 10 second wait expires and renew the
@@ -2153,10 +2155,18 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
 	while (true) {
 		ret = wait_for_completion_timeout(lport_unreg_cmp, wait_tmo);
 		if (unlikely(!ret)) {
+			pending = 0;
+			for (i = 0; i < phba->cfg_hdw_queue; i++) {
+				pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
+				if (!pring)
+					continue;
+				if (pring->txcmplq_cnt)
+					pending += pring->txcmplq_cnt;
+			}
 			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
 					 "6176 Lport %p Localport %p wait "
-					 "timed out. Renewing.\n",
-					 lport, vport->localport);
+					 "timed out. Pending %d. Renewing.\n",
+					 lport, vport->localport, pending);
 			continue;
 		}
 		break;
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 06170824a69b..d5812719de2b 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -220,19 +220,68 @@ lpfc_nvmet_cmd_template(void)
 	/* Word 12, 13, 14, 15 - is zero */
 }
 
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
+static struct lpfc_nvmet_rcv_ctx *
+lpfc_nvmet_get_ctx_for_xri(struct lpfc_hba *phba, u16 xri)
+{
+	struct lpfc_nvmet_rcv_ctx *ctxp;
+	unsigned long iflag;
+	bool found = false;
+
+	spin_lock_irqsave(&phba->sli4_hba.t_active_list_lock, iflag);
+	list_for_each_entry(ctxp, &phba->sli4_hba.t_active_ctx_list, list) {
+		if (ctxp->ctxbuf->sglq->sli4_xritag != xri)
+			continue;
+
+		found = true;
+		break;
+	}
+	spin_unlock_irqrestore(&phba->sli4_hba.t_active_list_lock, iflag);
+	if (found)
+		return ctxp;
+
+	return NULL;
+}
+
+static struct lpfc_nvmet_rcv_ctx *
+lpfc_nvmet_get_ctx_for_oxid(struct lpfc_hba *phba, u16 oxid, u32 sid)
+{
+	struct lpfc_nvmet_rcv_ctx *ctxp;
+	unsigned long iflag;
+	bool found = false;
+
+	spin_lock_irqsave(&phba->sli4_hba.t_active_list_lock, iflag);
+	list_for_each_entry(ctxp, &phba->sli4_hba.t_active_ctx_list, list) {
+		if (ctxp->oxid != oxid || ctxp->sid != sid)
+			continue;
+
+		found = true;
+		break;
+	}
+	spin_unlock_irqrestore(&phba->sli4_hba.t_active_list_lock, iflag);
+	if (found)
+		return ctxp;
+
+	return NULL;
+}
+#endif
+
 static void
 lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp)
 {
 	lockdep_assert_held(&ctxp->ctxlock);
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-			"6313 NVMET Defer ctx release xri x%x flg x%x\n",
+			"6313 NVMET Defer ctx release oxid x%x flg x%x\n",
 			ctxp->oxid, ctxp->flag);
 
 	if (ctxp->flag & LPFC_NVMET_CTX_RLS)
 		return;
 
 	ctxp->flag |= LPFC_NVMET_CTX_RLS;
+	spin_lock(&phba->sli4_hba.t_active_list_lock);
+	list_del(&ctxp->list);
+	spin_unlock(&phba->sli4_hba.t_active_list_lock);
 	spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 	list_add_tail(&ctxp->list, &phba->sli4_hba.lpfc_abts_nvmet_ctx_list);
 	spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
@@ -343,16 +392,23 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 	}
 
 	if (ctxp->rqb_buffer) {
-		nvmebuf = ctxp->rqb_buffer;
 		spin_lock_irqsave(&ctxp->ctxlock, iflag);
-		ctxp->rqb_buffer = NULL;
-		if (ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) {
-			ctxp->flag &= ~LPFC_NVMET_CTX_REUSE_WQ;
-			spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
-			nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
+		nvmebuf = ctxp->rqb_buffer;
+		/* check if freed in another path whilst acquiring lock */
+		if (nvmebuf) {
+			ctxp->rqb_buffer = NULL;
+			if (ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) {
+				ctxp->flag &= ~LPFC_NVMET_CTX_REUSE_WQ;
+				spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
+				nvmebuf->hrq->rqbp->rqb_free_buffer(phba,
+								    nvmebuf);
+			} else {
+				spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
+				/* repost */
+				lpfc_rq_buf_free(phba, &nvmebuf->hbuf);
+			}
 		} else {
 			spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
-			lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */
 		}
 	}
 	ctxp->state = LPFC_NVMET_STE_FREE;
@@ -388,8 +444,9 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 		spin_lock_init(&ctxp->ctxlock);
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-		if (ctxp->ts_cmd_nvme) {
-			ctxp->ts_cmd_nvme = ktime_get_ns();
+		/* NOTE: isr time stamp is stale when context is re-assigned*/
+		if (ctxp->ts_isr_cmd) {
+			ctxp->ts_cmd_nvme = 0;
 			ctxp->ts_nvme_data = 0;
 			ctxp->ts_data_wqput = 0;
 			ctxp->ts_isr_data = 0;
@@ -402,9 +459,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 #endif
 		atomic_inc(&tgtp->rcv_fcp_cmd_in);
 
-		/*  flag new work queued, replacement buffer has already
-		 *  been reposted
-		 */
+		/* Indicate that a replacement buffer has been posted */
 		spin_lock_irqsave(&ctxp->ctxlock, iflag);
 		ctxp->flag |= LPFC_NVMET_CTX_REUSE_WQ;
 		spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
@@ -433,6 +488,9 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
 	 * Use the CPU context list, from the MRQ the IO was received on
 	 * (ctxp->idx), to save context structure.
 	 */
+	spin_lock_irqsave(&phba->sli4_hba.t_active_list_lock, iflag);
+	list_del_init(&ctxp->list);
+	spin_unlock_irqrestore(&phba->sli4_hba.t_active_list_lock, iflag);
 	cpu = raw_smp_processor_id();
 	infop = lpfc_get_ctx_list(phba, cpu, ctxp->idx);
 	spin_lock_irqsave(&infop->nvmet_ctx_list_lock, iflag);
@@ -700,8 +758,10 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 		}
 
 		lpfc_printf_log(phba, KERN_INFO, logerr,
-				"6315 IO Error Cmpl xri x%x: %x/%x XBUSY:x%x\n",
-				ctxp->oxid, status, result, ctxp->flag);
+				"6315 IO Error Cmpl oxid: x%x xri: x%x %x/%x "
+				"XBUSY:x%x\n",
+				ctxp->oxid, ctxp->ctxbuf->sglq->sli4_xritag,
+				status, result, ctxp->flag);
 
 	} else {
 		rsp->fcp_error = NVME_SC_SUCCESS;
@@ -849,7 +909,6 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
 		 * before freeing ctxp and iocbq.
 		 */
 		lpfc_in_buf_free(phba, &nvmebuf->dbuf);
-		ctxp->rqb_buffer = 0;
 		atomic_inc(&nvmep->xmt_ls_rsp);
 		return 0;
 	}
@@ -922,7 +981,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 	    (ctxp->state == LPFC_NVMET_STE_ABORT)) {
 		atomic_inc(&lpfc_nvmep->xmt_fcp_drop);
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
-				"6102 IO xri x%x aborted\n",
+				"6102 IO oxid x%x aborted\n",
 				ctxp->oxid);
 		rc = -ENXIO;
 		goto aerr;
@@ -1022,7 +1081,7 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
 		ctxp->hdwq = &phba->sli4_hba.hdwq[0];
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-			"6103 NVMET Abort op: oxri x%x flg x%x ste %d\n",
+			"6103 NVMET Abort op: oxid x%x flg x%x ste %d\n",
 			ctxp->oxid, ctxp->flag, ctxp->state);
 
 	lpfc_nvmeio_data(phba, "NVMET FCP ABRT: xri x%x flg x%x ste x%x\n",
@@ -1035,7 +1094,7 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
 	/* Since iaab/iaar are NOT set, we need to check
 	 * if the firmware is in process of aborting IO
 	 */
-	if (ctxp->flag & LPFC_NVMET_XBUSY) {
+	if (ctxp->flag & (LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP)) {
 		spin_unlock_irqrestore(&ctxp->ctxlock, flags);
 		return;
 	}
@@ -1098,6 +1157,7 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
 			 ctxp->state, aborting);
 
 	atomic_inc(&lpfc_nvmep->xmt_fcp_release);
+	ctxp->flag &= ~LPFC_NVMET_TNOTIFY;
 
 	if (aborting)
 		return;
@@ -1122,7 +1182,7 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport,
 
 	if (!nvmebuf) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
-				"6425 Defer rcv: no buffer xri x%x: "
+				"6425 Defer rcv: no buffer oxid x%x: "
 				"flg %x ste %x\n",
 				ctxp->oxid, ctxp->flag, ctxp->state);
 		return;
@@ -1514,10 +1574,12 @@ void
 lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 			    struct sli4_wcqe_xri_aborted *axri)
 {
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
 	uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
 	struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
 	struct lpfc_nvmet_tgtport *tgtp;
+	struct nvmefc_tgt_fcp_req *req = NULL;
 	struct lpfc_nodelist *ndlp;
 	unsigned long iflag = 0;
 	int rrq_empty = 0;
@@ -1548,7 +1610,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 		 */
 		if (ctxp->flag & LPFC_NVMET_CTX_RLS &&
 		    !(ctxp->flag & LPFC_NVMET_ABORT_OP)) {
-			list_del(&ctxp->list);
+			list_del_init(&ctxp->list);
 			released = true;
 		}
 		ctxp->flag &= ~LPFC_NVMET_XBUSY;
@@ -1568,7 +1630,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 		}
 
 		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-				"6318 XB aborted oxid %x flg x%x (%x)\n",
+				"6318 XB aborted oxid x%x flg x%x (%x)\n",
 				ctxp->oxid, ctxp->flag, released);
 		if (released)
 			lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
@@ -1579,6 +1641,33 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
 	}
 	spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 	spin_unlock_irqrestore(&phba->hbalock, iflag);
+
+	ctxp = lpfc_nvmet_get_ctx_for_xri(phba, xri);
+	if (ctxp) {
+		/*
+		 *  Abort already done by FW, so BA_ACC sent.
+		 *  However, the transport may be unaware.
+		 */
+		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+				"6323 NVMET Rcv ABTS xri x%x ctxp state x%x "
+				"flag x%x oxid x%x rxid x%x\n",
+				xri, ctxp->state, ctxp->flag, ctxp->oxid,
+				rxid);
+
+		spin_lock_irqsave(&ctxp->ctxlock, iflag);
+		ctxp->flag |= LPFC_NVMET_ABTS_RCV;
+		ctxp->state = LPFC_NVMET_STE_ABORT;
+		spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
+
+		lpfc_nvmeio_data(phba,
+				 "NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n",
+				 xri, raw_smp_processor_id(), 0);
+
+		req = &ctxp->ctx.fcp_req;
+		if (req)
+			nvmet_fc_rcv_fcp_abort(phba->targetport, req);
+	}
+#endif
 }
 
 int
@@ -1589,19 +1678,23 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 	struct lpfc_hba *phba = vport->phba;
 	struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
 	struct nvmefc_tgt_fcp_req *rsp;
-	uint16_t xri;
+	uint32_t sid;
+	uint16_t oxid, xri;
 	unsigned long iflag = 0;
 
-	xri = be16_to_cpu(fc_hdr->fh_ox_id);
+	sid = sli4_sid_from_fc_hdr(fc_hdr);
+	oxid = be16_to_cpu(fc_hdr->fh_ox_id);
 
 	spin_lock_irqsave(&phba->hbalock, iflag);
 	spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 	list_for_each_entry_safe(ctxp, next_ctxp,
 				 &phba->sli4_hba.lpfc_abts_nvmet_ctx_list,
 				 list) {
-		if (ctxp->ctxbuf->sglq->sli4_xritag != xri)
+		if (ctxp->oxid != oxid || ctxp->sid != sid)
 			continue;
 
+		xri = ctxp->ctxbuf->sglq->sli4_xritag;
+
 		spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		spin_unlock_irqrestore(&phba->hbalock, iflag);
 
@@ -1626,11 +1719,93 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 	spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 	spin_unlock_irqrestore(&phba->hbalock, iflag);
 
-	lpfc_nvmeio_data(phba, "NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n",
-			 xri, raw_smp_processor_id(), 1);
+	/* check the wait list */
+	if (phba->sli4_hba.nvmet_io_wait_cnt) {
+		struct rqb_dmabuf *nvmebuf;
+		struct fc_frame_header *fc_hdr_tmp;
+		u32 sid_tmp;
+		u16 oxid_tmp;
+		bool found = false;
+
+		spin_lock_irqsave(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
+
+		/* match by oxid and s_id */
+		list_for_each_entry(nvmebuf,
+				    &phba->sli4_hba.lpfc_nvmet_io_wait_list,
+				    hbuf.list) {
+			fc_hdr_tmp = (struct fc_frame_header *)
+					(nvmebuf->hbuf.virt);
+			oxid_tmp = be16_to_cpu(fc_hdr_tmp->fh_ox_id);
+			sid_tmp = sli4_sid_from_fc_hdr(fc_hdr_tmp);
+			if (oxid_tmp != oxid || sid_tmp != sid)
+				continue;
+
+			lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+					"6321 NVMET Rcv ABTS oxid x%x from x%x "
+					"is waiting for a ctxp\n",
+					oxid, sid);
+
+			list_del_init(&nvmebuf->hbuf.list);
+			phba->sli4_hba.nvmet_io_wait_cnt--;
+			found = true;
+			break;
+		}
+		spin_unlock_irqrestore(&phba->sli4_hba.nvmet_io_wait_lock,
+				       iflag);
+
+		/* free buffer since already posted a new DMA buffer to RQ */
+		if (found) {
+			nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf);
+			/* Respond with BA_ACC accordingly */
+			lpfc_sli4_seq_abort_rsp(vport, fc_hdr, 1);
+			return 0;
+		}
+	}
+
+	/* check active list */
+	ctxp = lpfc_nvmet_get_ctx_for_oxid(phba, oxid, sid);
+	if (ctxp) {
+		xri = ctxp->ctxbuf->sglq->sli4_xritag;
+
+		spin_lock_irqsave(&ctxp->ctxlock, iflag);
+		ctxp->flag |= (LPFC_NVMET_ABTS_RCV | LPFC_NVMET_ABORT_OP);
+		spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
+
+		lpfc_nvmeio_data(phba,
+				 "NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n",
+				 xri, raw_smp_processor_id(), 0);
+
+		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+				"6322 NVMET Rcv ABTS:acc oxid x%x xri x%x "
+				"flag x%x state x%x\n",
+				ctxp->oxid, xri, ctxp->flag, ctxp->state);
+
+		if (ctxp->flag & LPFC_NVMET_TNOTIFY) {
+			/* Notify the transport */
+			nvmet_fc_rcv_fcp_abort(phba->targetport,
+					       &ctxp->ctx.fcp_req);
+		} else {
+			cancel_work_sync(&ctxp->ctxbuf->defer_work);
+			spin_lock_irqsave(&ctxp->ctxlock, iflag);
+			lpfc_nvmet_defer_release(phba, ctxp);
+			spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
+		}
+		if (ctxp->state == LPFC_NVMET_STE_RCV)
+			lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid,
+							 ctxp->oxid);
+		else
+			lpfc_nvmet_sol_fcp_issue_abort(phba, ctxp, ctxp->sid,
+						       ctxp->oxid);
+
+		lpfc_sli4_seq_abort_rsp(vport, fc_hdr, 1);
+		return 0;
+	}
+
+	lpfc_nvmeio_data(phba, "NVMET ABTS RCV: oxid x%x CPU %02x rjt %d\n",
+			 oxid, raw_smp_processor_id(), 1);
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-			"6320 NVMET Rcv ABTS:rjt xri x%x\n", xri);
+			"6320 NVMET Rcv ABTS:rjt oxid x%x\n", oxid);
 
 	/* Respond with BA_RJT accordingly */
 	lpfc_sli4_seq_abort_rsp(vport, fc_hdr, 0);
@@ -1714,6 +1889,18 @@ lpfc_nvmet_wqfull_process(struct lpfc_hba *phba,
 			spin_unlock_irqrestore(&pring->ring_lock, iflags);
 			return;
 		}
+		if (rc == WQE_SUCCESS) {
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+			if (ctxp->ts_cmd_nvme) {
+				if (ctxp->ctx.fcp_req.op == NVMET_FCOP_RSP)
+					ctxp->ts_status_wqput = ktime_get_ns();
+				else
+					ctxp->ts_data_wqput = ktime_get_ns();
+			}
+#endif
+		} else {
+			WARN_ON(rc);
+		}
 	}
 	wq->q_flag &= ~HBA_NVMET_WQFULL;
 	spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -1879,8 +2066,20 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf)
 		return;
 	}
 
+	if (ctxp->flag & LPFC_NVMET_ABTS_RCV) {
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
+				"6324 IO oxid x%x aborted\n",
+				ctxp->oxid);
+		return;
+	}
+
 	payload = (uint32_t *)(nvmebuf->dbuf.virt);
 	tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+	ctxp->flag |= LPFC_NVMET_TNOTIFY;
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+	if (ctxp->ts_isr_cmd)
+		ctxp->ts_cmd_nvme = ktime_get_ns();
+#endif
 	/*
 	 * The calling sequence should be:
 	 * nvmet_fc_rcv_fcp_req->lpfc_nvmet_xmt_fcp_op/cmp- req->done
@@ -1930,6 +2129,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf)
 			phba->sli4_hba.nvmet_mrq_data[qno], 1, qno);
 		return;
 	}
+	ctxp->flag &= ~LPFC_NVMET_TNOTIFY;
 	atomic_inc(&tgtp->rcv_fcp_cmd_drop);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 			"2582 FCP Drop IO x%x: err x%x: x%x x%x x%x\n",
@@ -2019,6 +2219,8 @@ lpfc_nvmet_replenish_context(struct lpfc_hba *phba,
  * @phba: pointer to lpfc hba data structure.
  * @idx: relative index of MRQ vector
  * @nvmebuf: pointer to lpfc nvme command HBQ data structure.
+ * @isr_timestamp: in jiffies.
+ * @cqflag: cq processing information regarding workload.
  *
  * This routine is used for processing the WQE associated with a unsolicited
  * event. It first determines whether there is an existing ndlp that matches
@@ -2031,7 +2233,8 @@ static void
 lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 			    uint32_t idx,
 			    struct rqb_dmabuf *nvmebuf,
-			    uint64_t isr_timestamp)
+			    uint64_t isr_timestamp,
+			    uint8_t cqflag)
 {
 	struct lpfc_nvmet_rcv_ctx *ctxp;
 	struct lpfc_nvmet_tgtport *tgtp;
@@ -2118,6 +2321,9 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	sid = sli4_sid_from_fc_hdr(fc_hdr);
 
 	ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context;
+	spin_lock_irqsave(&phba->sli4_hba.t_active_list_lock, iflag);
+	list_add_tail(&ctxp->list, &phba->sli4_hba.t_active_ctx_list);
+	spin_unlock_irqrestore(&phba->sli4_hba.t_active_list_lock, iflag);
 	if (ctxp->state != LPFC_NVMET_STE_FREE) {
 		lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
 				"6414 NVMET Context corrupt %d %d oxid x%x\n",
@@ -2140,24 +2346,41 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
 	spin_lock_init(&ctxp->ctxlock);
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
-	if (isr_timestamp) {
+	if (isr_timestamp)
 		ctxp->ts_isr_cmd = isr_timestamp;
-		ctxp->ts_cmd_nvme = ktime_get_ns();
-		ctxp->ts_nvme_data = 0;
-		ctxp->ts_data_wqput = 0;
-		ctxp->ts_isr_data = 0;
-		ctxp->ts_data_nvme = 0;
-		ctxp->ts_nvme_status = 0;
-		ctxp->ts_status_wqput = 0;
-		ctxp->ts_isr_status = 0;
-		ctxp->ts_status_nvme = 0;
-	} else {
-		ctxp->ts_cmd_nvme = 0;
-	}
+	ctxp->ts_cmd_nvme = 0;
+	ctxp->ts_nvme_data = 0;
+	ctxp->ts_data_wqput = 0;
+	ctxp->ts_isr_data = 0;
+	ctxp->ts_data_nvme = 0;
+	ctxp->ts_nvme_status = 0;
+	ctxp->ts_status_wqput = 0;
+	ctxp->ts_isr_status = 0;
+	ctxp->ts_status_nvme = 0;
 #endif
 
 	atomic_inc(&tgtp->rcv_fcp_cmd_in);
-	lpfc_nvmet_process_rcv_fcp_req(ctx_buf);
+	/* check for cq processing load */
+	if (!cqflag) {
+		lpfc_nvmet_process_rcv_fcp_req(ctx_buf);
+		return;
+	}
+
+	if (!queue_work(phba->wq, &ctx_buf->defer_work)) {
+		atomic_inc(&tgtp->rcv_fcp_cmd_drop);
+		lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+				"6325 Unable to queue work for oxid x%x. "
+				"FCP Drop IO [x%x x%x x%x]\n",
+				ctxp->oxid,
+				atomic_read(&tgtp->rcv_fcp_cmd_in),
+				atomic_read(&tgtp->rcv_fcp_cmd_out),
+				atomic_read(&tgtp->xmt_fcp_release));
+
+		spin_lock_irqsave(&ctxp->ctxlock, iflag);
+		lpfc_nvmet_defer_release(phba, ctxp);
+		spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
+		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid);
+	}
 }
 
 /**
@@ -2194,6 +2417,8 @@ lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
  * @phba: pointer to lpfc hba data structure.
  * @idx: relative index of MRQ vector
  * @nvmebuf: pointer to received nvme data structure.
+ * @isr_timestamp: in jiffies.
+ * @cqflag: cq processing information regarding workload.
  *
  * This routine is used to process an unsolicited event received from a SLI
  * (Service Level Interface) ring. The actual processing of the data buffer
@@ -2205,14 +2430,14 @@ void
 lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba,
 			   uint32_t idx,
 			   struct rqb_dmabuf *nvmebuf,
-			   uint64_t isr_timestamp)
+			   uint64_t isr_timestamp,
+			   uint8_t cqflag)
 {
 	if (phba->nvmet_support == 0) {
 		lpfc_rq_buf_free(phba, &nvmebuf->hbuf);
 		return;
 	}
-	lpfc_nvmet_unsol_fcp_buffer(phba, idx, nvmebuf,
-				    isr_timestamp);
+	lpfc_nvmet_unsol_fcp_buffer(phba, idx, nvmebuf, isr_timestamp, cqflag);
 }
 
 /**
@@ -2750,7 +2975,7 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	if ((ctxp->flag & LPFC_NVMET_CTX_RLS) &&
 	    !(ctxp->flag & LPFC_NVMET_XBUSY)) {
 		spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
-		list_del(&ctxp->list);
+		list_del_init(&ctxp->list);
 		spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		released = true;
 	}
@@ -2759,7 +2984,7 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	atomic_inc(&tgtp->xmt_abort_rsp);
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-			"6165 ABORT cmpl: xri x%x flg x%x (%d) "
+			"6165 ABORT cmpl: oxid x%x flg x%x (%d) "
 			"WCQE: %08x %08x %08x %08x\n",
 			ctxp->oxid, ctxp->flag, released,
 			wcqe->word0, wcqe->total_data_placed,
@@ -2834,7 +3059,7 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	if ((ctxp->flag & LPFC_NVMET_CTX_RLS) &&
 	    !(ctxp->flag & LPFC_NVMET_XBUSY)) {
 		spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
-		list_del(&ctxp->list);
+		list_del_init(&ctxp->list);
 		spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		released = true;
 	}
@@ -2843,7 +3068,7 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
 	atomic_inc(&tgtp->xmt_abort_rsp);
 
 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
-			"6316 ABTS cmpl xri x%x flg x%x (%x) "
+			"6316 ABTS cmpl oxid x%x flg x%x (%x) "
 			"WCQE: %08x %08x %08x %08x\n",
 			ctxp->oxid, ctxp->flag, released,
 			wcqe->word0, wcqe->total_data_placed,
@@ -3214,7 +3439,7 @@ aerr:
 	spin_lock_irqsave(&ctxp->ctxlock, flags);
 	if (ctxp->flag & LPFC_NVMET_CTX_RLS) {
 		spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
-		list_del(&ctxp->list);
+		list_del_init(&ctxp->list);
 		spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
 		released = true;
 	}
@@ -3223,8 +3448,9 @@ aerr:
 
 	atomic_inc(&tgtp->xmt_abort_rsp_error);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
-			"6135 Failed to Issue ABTS for oxid x%x. Status x%x\n",
-			ctxp->oxid, rc);
+			"6135 Failed to Issue ABTS for oxid x%x. Status x%x "
+			"(%x)\n",
+			ctxp->oxid, rc, released);
 	if (released)
 		lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
 	return 1;
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
index 2f3f603d94c4..8ff67deac10a 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -140,6 +140,7 @@ struct lpfc_nvmet_rcv_ctx {
 #define LPFC_NVMET_ABTS_RCV		0x10  /* ABTS received on exchange */
 #define LPFC_NVMET_CTX_REUSE_WQ		0x20  /* ctx reused via WQ */
 #define LPFC_NVMET_DEFER_WQFULL		0x40  /* Waiting on a free WQE */
+#define LPFC_NVMET_TNOTIFY		0x80  /* notify transport of abts */
 	struct rqb_dmabuf *rqb_buffer;
 	struct lpfc_nvmet_ctxbuf *ctxbuf;
 	struct lpfc_sli4_hdw_queue *hdwq;
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index ba996fbde89b..f9df800e7067 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -3879,10 +3879,8 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn,
 	 */
 	spin_lock(&lpfc_cmd->buf_lock);
 	lpfc_cmd->cur_iocbq.iocb_flag &= ~LPFC_DRIVER_ABORTED;
-	if (lpfc_cmd->waitq) {
+	if (lpfc_cmd->waitq)
 		wake_up(lpfc_cmd->waitq);
-		lpfc_cmd->waitq = NULL;
-	}
 	spin_unlock(&lpfc_cmd->buf_lock);
 
 	lpfc_release_scsi_buf(phba, lpfc_cmd);
@@ -4718,6 +4716,9 @@ wait_for_cmpl:
 				 iocb->sli4_xritag, ret,
 				 cmnd->device->id, cmnd->device->lun);
 	}
+
+	lpfc_cmd->waitq = NULL;
+
 	spin_unlock(&lpfc_cmd->buf_lock);
 	goto out;
 
@@ -4797,7 +4798,12 @@ lpfc_check_fcp_rsp(struct lpfc_vport *vport, struct lpfc_io_buf *lpfc_cmd)
 				 rsp_info,
 				 rsp_len, rsp_info_code);
 
-		if ((fcprsp->rspStatus2&RSP_LEN_VALID) && (rsp_len == 8)) {
+		/* If FCP_RSP_LEN_VALID bit is one, then the FCP_RSP_LEN
+		 * field specifies the number of valid bytes of FCP_RSP_INFO.
+		 * The FCP_RSP_LEN field shall be set to 0x04 or 0x08
+		 */
+		if ((fcprsp->rspStatus2 & RSP_LEN_VALID) &&
+		    ((rsp_len == 8) || (rsp_len == 4))) {
 			switch (rsp_info_code) {
 			case RSP_NO_FAILURE:
 				lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP,
@@ -5741,7 +5747,7 @@ lpfc_enable_oas_lun(struct lpfc_hba *phba, struct lpfc_name *vport_wwpn,
 
 	/* Create an lun info structure and add to list of luns */
 	lun_info = lpfc_create_device_data(phba, vport_wwpn, target_wwpn, lun,
-					   pri, false);
+					   pri, true);
 	if (lun_info) {
 		lun_info->oas_enabled = true;
 		lun_info->priority = pri;
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 4329cc44bb55..f9e6a135d656 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -108,7 +108,7 @@ lpfc_get_iocb_from_iocbq(struct lpfc_iocbq *iocbq)
  * endianness. This function can be called with or without
  * lock.
  **/
-void
+static void
 lpfc_sli4_pcimem_bcopy(void *srcp, void *destp, uint32_t cnt)
 {
 	uint64_t *src = srcp;
@@ -5571,6 +5571,7 @@ lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
 	int qidx;
 	struct lpfc_sli4_hba *sli4_hba = &phba->sli4_hba;
 	struct lpfc_sli4_hdw_queue *qp;
+	struct lpfc_queue *eq;
 
 	sli4_hba->sli4_write_cq_db(phba, sli4_hba->mbx_cq, 0, LPFC_QUEUE_REARM);
 	sli4_hba->sli4_write_cq_db(phba, sli4_hba->els_cq, 0, LPFC_QUEUE_REARM);
@@ -5578,18 +5579,24 @@ lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
 		sli4_hba->sli4_write_cq_db(phba, sli4_hba->nvmels_cq, 0,
 					   LPFC_QUEUE_REARM);
 
-	qp = sli4_hba->hdwq;
 	if (sli4_hba->hdwq) {
+		/* Loop thru all Hardware Queues */
 		for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
-			sli4_hba->sli4_write_cq_db(phba, qp[qidx].fcp_cq, 0,
+			qp = &sli4_hba->hdwq[qidx];
+			/* ARM the corresponding CQ */
+			sli4_hba->sli4_write_cq_db(phba, qp->fcp_cq, 0,
 						   LPFC_QUEUE_REARM);
-			sli4_hba->sli4_write_cq_db(phba, qp[qidx].nvme_cq, 0,
+			sli4_hba->sli4_write_cq_db(phba, qp->nvme_cq, 0,
 						   LPFC_QUEUE_REARM);
 		}
 
-		for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++)
-			sli4_hba->sli4_write_eq_db(phba, qp[qidx].hba_eq,
-						0, LPFC_QUEUE_REARM);
+		/* Loop thru all IRQ vectors */
+		for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
+			eq = sli4_hba->hba_eq_hdl[qidx].eq;
+			/* ARM the corresponding EQ */
+			sli4_hba->sli4_write_eq_db(phba, eq,
+						   0, LPFC_QUEUE_REARM);
+		}
 	}
 
 	if (phba->nvmet_support) {
@@ -7875,26 +7882,28 @@ lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba)
  * and will process all the completions associated with the eq for the
  * mailbox completion queue.
  **/
-bool
+static bool
 lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
 {
 	struct lpfc_sli4_hba *sli4_hba = &phba->sli4_hba;
 	uint32_t eqidx;
 	struct lpfc_queue *fpeq = NULL;
+	struct lpfc_queue *eq;
 	bool mbox_pending;
 
 	if (unlikely(!phba) || (phba->sli_rev != LPFC_SLI_REV4))
 		return false;
 
-	/* Find the eq associated with the mcq */
-
-	if (sli4_hba->hdwq)
-		for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++)
-			if (sli4_hba->hdwq[eqidx].hba_eq->queue_id ==
-			    sli4_hba->mbx_cq->assoc_qid) {
-				fpeq = sli4_hba->hdwq[eqidx].hba_eq;
+	/* Find the EQ associated with the mbox CQ */
+	if (sli4_hba->hdwq) {
+		for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++) {
+			eq = phba->sli4_hba.hba_eq_hdl[eqidx].eq;
+			if (eq->queue_id == sli4_hba->mbx_cq->assoc_qid) {
+				fpeq = eq;
 				break;
 			}
+		}
+	}
 	if (!fpeq)
 		return false;
 
@@ -13605,14 +13614,9 @@ __lpfc_sli4_process_cq(struct lpfc_hba *phba, struct lpfc_queue *cq,
 		goto rearm_and_exit;
 
 	/* Process all the entries to the CQ */
+	cq->q_flag = 0;
 	cqe = lpfc_sli4_cq_get(cq);
 	while (cqe) {
-#if defined(CONFIG_SCSI_LPFC_DEBUG_FS) && defined(BUILD_NVME)
-		if (phba->ktime_on)
-			cq->isr_timestamp = ktime_get_ns();
-		else
-			cq->isr_timestamp = 0;
-#endif
 		workposted |= handler(phba, cq, cqe);
 		__lpfc_sli4_consume_cqe(phba, cq, cqe);
 
@@ -13626,6 +13630,9 @@ __lpfc_sli4_process_cq(struct lpfc_hba *phba, struct lpfc_queue *cq,
 			consumed = 0;
 		}
 
+		if (count == LPFC_NVMET_CQ_NOTIFY)
+			cq->q_flag |= HBA_NVMET_CQ_NOTIFY;
+
 		cqe = lpfc_sli4_cq_get(cq);
 	}
 	if (count >= phba->cfg_cq_poll_threshold) {
@@ -13941,10 +13948,10 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 			goto drop;
 
 		if (fc_hdr->fh_type == FC_TYPE_FCP) {
-			dma_buf->bytes_recv = bf_get(lpfc_rcqe_length,  rcqe);
+			dma_buf->bytes_recv = bf_get(lpfc_rcqe_length, rcqe);
 			lpfc_nvmet_unsol_fcp_event(
-				phba, idx, dma_buf,
-				cq->isr_timestamp);
+				phba, idx, dma_buf, cq->isr_timestamp,
+				cq->q_flag & HBA_NVMET_CQ_NOTIFY);
 			return false;
 		}
 drop:
@@ -14110,6 +14117,12 @@ process_cq:
 	}
 
 work_cq:
+#if defined(CONFIG_SCSI_LPFC_DEBUG_FS)
+	if (phba->ktime_on)
+		cq->isr_timestamp = ktime_get_ns();
+	else
+		cq->isr_timestamp = 0;
+#endif
 	if (!queue_work_on(cq->chann, phba->wq, &cq->irqwork))
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"0363 Cannot schedule soft IRQ "
@@ -14236,7 +14249,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
 		return IRQ_NONE;
 
 	/* Get to the EQ struct associated with this vector */
-	fpeq = phba->sli4_hba.hdwq[hba_eqidx].hba_eq;
+	fpeq = phba->sli4_hba.hba_eq_hdl[hba_eqidx].eq;
 	if (unlikely(!fpeq))
 		return IRQ_NONE;
 
@@ -14521,7 +14534,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
 	/* set values by EQ_DELAY register if supported */
 	if (phba->sli.sli_flag & LPFC_SLI_USE_EQDR) {
 		for (qidx = startq; qidx < phba->cfg_irq_chann; qidx++) {
-			eq = phba->sli4_hba.hdwq[qidx].hba_eq;
+			eq = phba->sli4_hba.hba_eq_hdl[qidx].eq;
 			if (!eq)
 				continue;
 
@@ -14530,7 +14543,6 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
 			if (++cnt >= numq)
 				break;
 		}
-
 		return;
 	}
 
@@ -14558,7 +14570,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
 		dmult = LPFC_DMULT_MAX;
 
 	for (qidx = startq; qidx < phba->cfg_irq_chann; qidx++) {
-		eq = phba->sli4_hba.hdwq[qidx].hba_eq;
+		eq = phba->sli4_hba.hba_eq_hdl[qidx].eq;
 		if (!eq)
 			continue;
 		eq->q_mode = usdelay;
@@ -14660,8 +14672,10 @@ lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint32_t imax)
 		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
 				"0360 Unsupported EQ count. (%d)\n",
 				eq->entry_count);
-		if (eq->entry_count < 256)
-			return -EINVAL;
+		if (eq->entry_count < 256) {
+			status = -EINVAL;
+			goto out;
+		}
 		/* fall through - otherwise default to smallest count */
 	case 256:
 		bf_set(lpfc_eq_context_count, &eq_create->u.request.context,
@@ -14713,7 +14727,7 @@ lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint32_t imax)
 	eq->host_index = 0;
 	eq->notify_interval = LPFC_EQ_NOTIFY_INTRVL;
 	eq->max_proc_limit = LPFC_EQ_MAX_PROC_LIMIT;
-
+out:
 	mempool_free(mbox, phba->mbox_mem_pool);
 	return status;
 }
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index 8e4fd1a98023..3aeca387b22a 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -197,6 +197,8 @@ struct lpfc_queue {
 #define LPFC_DB_LIST_FORMAT	0x02
 	uint8_t q_flag;
 #define HBA_NVMET_WQFULL	0x1 /* We hit WQ Full condition for NVMET */
+#define HBA_NVMET_CQ_NOTIFY	0x1 /* LPFC_NVMET_CQ_NOTIFY CQEs this EQE */
+#define LPFC_NVMET_CQ_NOTIFY	4
 	void __iomem *db_regaddr;
 	uint16_t dpp_enable;
 	uint16_t dpp_id;
@@ -450,6 +452,7 @@ struct lpfc_hba_eq_hdl {
 	uint32_t idx;
 	char handler_name[LPFC_SLI4_HANDLER_NAME_SZ];
 	struct lpfc_hba *phba;
+	struct lpfc_queue *eq;
 };
 
 /*BB Credit recovery value*/
@@ -512,6 +515,7 @@ struct lpfc_pc_sli4_params {
 #define LPFC_WQ_SZ64_SUPPORT	1
 #define LPFC_WQ_SZ128_SUPPORT	2
 	uint8_t wqpcnt;
+	uint8_t nvme;
 };
 
 #define LPFC_CQ_4K_PAGE_SZ	0x1
@@ -546,7 +550,10 @@ struct lpfc_vector_map_info {
 	uint16_t	irq;
 	uint16_t	eq;
 	uint16_t	hdwq;
-	uint16_t	hyper;
+	uint16_t	flag;
+#define LPFC_CPU_MAP_HYPER	0x1
+#define LPFC_CPU_MAP_UNASSIGN	0x2
+#define LPFC_CPU_FIRST_IRQ	0x4
 };
 #define LPFC_VECTOR_MAP_EMPTY	0xffff
 
@@ -843,6 +850,8 @@ struct lpfc_sli4_hba {
 	struct list_head lpfc_nvmet_sgl_list;
 	spinlock_t abts_nvmet_buf_list_lock; /* list of aborted NVMET IOs */
 	struct list_head lpfc_abts_nvmet_ctx_list;
+	spinlock_t t_active_list_lock; /* list of active NVMET IOs */
+	struct list_head t_active_ctx_list;
 	struct list_head lpfc_nvmet_io_wait_list;
 	struct lpfc_nvmet_ctx_info *nvmet_ctx_info;
 	struct lpfc_sglq **lpfc_sglq_active_list;
diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 220a932fe943..f7e93aaf1e00 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "12.2.0.2"
+#define LPFC_DRIVER_VERSION "12.2.0.3"
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */