scsi: lpfc: Fix hang when downloading fw on port enabled for nvme
authorJames Smart <jsmart2021@gmail.com>
Wed, 14 Aug 2019 23:56:55 +0000 (16:56 -0700)
committerMartin K. Petersen <martin.petersen@oracle.com>
Tue, 20 Aug 2019 02:41:10 +0000 (22:41 -0400)
As part of firmware download, the adapter is reset. On the adapter the
reset causes the function to stop and all outstanding io is terminated
(without responses). The reset path then starts teardown of the adapter,
starting with deregistration of the remote ports with the nvme-fc
transport. The local port is then deregistered and the driver waits for
local port deregistration. This never finishes.

The remote port deregistrations terminated the nvme controllers, causing
them to send aborts for all the outstanding io. The aborts were serviced in
the driver, but stalled due to its state. The nvme layer then stops to
reclaim it's outstanding io before continuing.  The io must be returned
before the reset on the controller is deemed complete and the controller
delete performed.  The remote port deregistration won't complete until all
the controllers are terminated. And the local port deregistration won't
complete until all controllers and remote ports are terminated. Thus things
hang.

The issue is the reset which stopped the adapter also stopped all the
responses that would drive i/o completions, and the aborts were also
stopped that stopped i/o completions. The driver, when resetting the
adapter like this, needs to be generating the completions as part of the
adapter reset so that I/O complete (in error), and any aborts are not
queued.

Fix by adding flush routines whenever the adapter port has been reset or
discovered in error. The flush routines will generate the completions for
the scsi and nvme outstanding io. The abort ios, if waiting, will be caught
and flushed as well.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/lpfc/lpfc_crtn.h
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/lpfc/lpfc_nvme.c
drivers/scsi/lpfc/lpfc_sli.c

index 68e9f96242d37ed0aee8eee0613bcb1e456cdcab..bee27bb7123c67ce1687e6203580d6a037fb3d3a 100644 (file)
@@ -595,6 +595,7 @@ void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *ncmd,
                         struct lpfc_sli4_hdw_queue *qp);
 void lpfc_nvme_cmd_template(void);
 void lpfc_nvmet_cmd_template(void);
+void lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn);
 extern int lpfc_enable_nvmet_cnt;
 extern unsigned long long lpfc_enable_nvmet[];
 extern int lpfc_no_hba_reset_cnt;
index c549212a2b49f5c009e0541f044cc76136fb0eac..8c0fb9baf723c98553f876dfbe1b478248cc2855 100644 (file)
@@ -1546,6 +1546,8 @@ lpfc_sli4_offline_eratt(struct lpfc_hba *phba)
        spin_unlock_irq(&phba->hbalock);
 
        lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT);
+       lpfc_sli_flush_fcp_rings(phba);
+       lpfc_sli_flush_nvme_rings(phba);
        lpfc_offline(phba);
        lpfc_hba_down_post(phba);
        lpfc_unblock_mgmt_io(phba);
@@ -1807,6 +1809,8 @@ lpfc_sli4_port_sta_fn_reset(struct lpfc_hba *phba, int mbx_action,
                                "2887 Reset Needed: Attempting Port "
                                "Recovery...\n");
        lpfc_offline_prep(phba, mbx_action);
+       lpfc_sli_flush_fcp_rings(phba);
+       lpfc_sli_flush_nvme_rings(phba);
        lpfc_offline(phba);
        /* release interrupt for possible resource change */
        lpfc_sli4_disable_intr(phba);
index 103708503592dc5c4c02a0436a7f4ad8b7bd48b5..c7f5b50c382000743c244008c71c9b8e6e8004bf 100644 (file)
@@ -2668,3 +2668,50 @@ lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba)
                }
        }
 }
+
+void
+lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn)
+{
+#if (IS_ENABLED(CONFIG_NVME_FC))
+       struct lpfc_io_buf *lpfc_ncmd;
+       struct nvmefc_fcp_req *nCmd;
+       struct lpfc_nvme_fcpreq_priv *freqpriv;
+
+       if (!pwqeIn->context1) {
+               lpfc_sli_release_iocbq(phba, pwqeIn);
+               return;
+       }
+       /* For abort iocb just return, IO iocb will do a done call */
+       if (bf_get(wqe_cmnd, &pwqeIn->wqe.gen_req.wqe_com) ==
+           CMD_ABORT_XRI_CX) {
+               lpfc_sli_release_iocbq(phba, pwqeIn);
+               return;
+       }
+       lpfc_ncmd = (struct lpfc_io_buf *)pwqeIn->context1;
+
+       spin_lock(&lpfc_ncmd->buf_lock);
+       if (!lpfc_ncmd->nvmeCmd) {
+               spin_unlock(&lpfc_ncmd->buf_lock);
+               lpfc_release_nvme_buf(phba, lpfc_ncmd);
+               return;
+       }
+
+       nCmd = lpfc_ncmd->nvmeCmd;
+       lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
+                       "6194 NVME Cancel xri %x\n",
+                       lpfc_ncmd->cur_iocbq.sli4_xritag);
+
+       nCmd->transferred_length = 0;
+       nCmd->rcv_rsplen = 0;
+       nCmd->status = NVME_SC_INTERNAL;
+       freqpriv = nCmd->private;
+       freqpriv->nvme_buf = NULL;
+       lpfc_ncmd->nvmeCmd = NULL;
+
+       spin_unlock(&lpfc_ncmd->buf_lock);
+       nCmd->done(nCmd);
+
+       /* Call release with XB=1 to queue the IO into the abort list. */
+       lpfc_release_nvme_buf(phba, lpfc_ncmd);
+#endif
+}
index be89a86f96498dde66543a44843b28e232d896c4..c8db7ec31afbcc7a7582abd36527e012e86ee6cd 100644 (file)
@@ -1391,9 +1391,12 @@ lpfc_sli_cancel_iocbs(struct lpfc_hba *phba, struct list_head *iocblist,
 
        while (!list_empty(iocblist)) {
                list_remove_head(iocblist, piocb, struct lpfc_iocbq, list);
-               if (!piocb->iocb_cmpl)
-                       lpfc_sli_release_iocbq(phba, piocb);
-               else {
+               if (!piocb->iocb_cmpl) {
+                       if (piocb->iocb_flag & LPFC_IO_NVME)
+                               lpfc_nvme_cancel_iocb(phba, piocb);
+                       else
+                               lpfc_sli_release_iocbq(phba, piocb);
+               } else {
                        piocb->iocb.ulpStatus = ulpstatus;
                        piocb->iocb.un.ulpWord[4] = ulpWord4;
                        (piocb->iocb_cmpl) (phba, piocb, piocb);