scsi: qla2xxx: Change abort wait_loop from msleep to wait_event_timeout
authorGiridhar Malavali <gmalavali@marvell.com>
Tue, 2 Apr 2019 21:24:33 +0000 (14:24 -0700)
committerMartin K. Petersen <martin.petersen@oracle.com>
Thu, 4 Apr 2019 03:45:59 +0000 (23:45 -0400)
This patch converts driver wait time from using msleep to
wair_event_timeout to prevent race condition.

Signed-off-by: Giridhar Malavali <gmalavali@marvell.com>
Signed-off-by: Himanshu Madhani <hmadhani@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/qla2xxx/qla_def.h
drivers/scsi/qla2xxx/qla_os.c

index 8d28ea9b0b73a3aa47691be5947002b47e223b45..b7b0a3e0ecbc1543b179051fe070b6df733754ce 100644 (file)
@@ -546,6 +546,7 @@ typedef struct srb {
        int rc;
        int retry_count;
        struct completion comp;
+       wait_queue_head_t *cwaitq;
        union {
                struct srb_iocb iocb_cmd;
                struct bsg_job *bsg_job;
@@ -4794,5 +4795,4 @@ struct sff_8247_a0 {
 #include "qla_gbl.h"
 #include "qla_dbg.h"
 #include "qla_inline.h"
-
 #endif
index d49208c16928b9aa3db4121d3f836a9a8a24c380..57e1041e4ca28af8f51f19cac5263f0f9d22186e 100644 (file)
@@ -726,7 +726,7 @@ qla2x00_sp_free_dma(void *ptr)
        }
 
        if (!ctx)
-               goto end;
+               return;
 
        if (sp->flags & SRB_CRC_CTX_DSD_VALID) {
                /* List assured to be having elements */
@@ -751,12 +751,6 @@ qla2x00_sp_free_dma(void *ptr)
                ha->gbl_dsd_avail += ctx1->dsd_use_cnt;
                mempool_free(ctx1, ha->ctx_mempool);
        }
-
-end:
-       if (sp->type != SRB_NVME_CMD && sp->type != SRB_NVME_LS) {
-               CMD_SP(cmd) = NULL;
-               qla2x00_rel_sp(sp);
-       }
 }
 
 void
@@ -764,6 +758,7 @@ qla2x00_sp_compl(void *ptr, int res)
 {
        srb_t *sp = ptr;
        struct scsi_cmnd *cmd = GET_CMD_SP(sp);
+       wait_queue_head_t *cwaitq = sp->cwaitq;
 
        if (atomic_read(&sp->ref_count) == 0) {
                ql_dbg(ql_dbg_io, sp->vha, 0x3015,
@@ -778,7 +773,11 @@ qla2x00_sp_compl(void *ptr, int res)
 
        sp->free(sp);
        cmd->result = res;
+       CMD_SP(cmd) = NULL;
        cmd->scsi_done(cmd);
+       if (cwaitq)
+               wake_up(cwaitq);
+       qla2x00_rel_sp(sp);
 }
 
 void
@@ -801,7 +800,7 @@ qla2xxx_qpair_sp_free_dma(void *ptr)
        }
 
        if (!ctx)
-               goto end;
+               return;
 
        if (sp->flags & SRB_CRC_CTX_DSD_VALID) {
                /* List assured to be having elements */
@@ -861,10 +860,6 @@ qla2xxx_qpair_sp_free_dma(void *ptr)
                }
                sp->flags &= ~SRB_DIF_BUNDL_DMA_VALID;
        }
-
-end:
-       CMD_SP(cmd) = NULL;
-       qla2xxx_rel_qpair_sp(sp->qpair, sp);
 }
 
 void
@@ -872,8 +867,7 @@ qla2xxx_qpair_sp_compl(void *ptr, int res)
 {
        srb_t *sp = ptr;
        struct scsi_cmnd *cmd = GET_CMD_SP(sp);
-
-       cmd->result = res;
+       wait_queue_head_t *cwaitq = sp->cwaitq;
 
        if (atomic_read(&sp->ref_count) == 0) {
                ql_dbg(ql_dbg_io, sp->fcport->vha, 0x3079,
@@ -887,7 +881,12 @@ qla2xxx_qpair_sp_compl(void *ptr, int res)
                return;
 
        sp->free(sp);
+       cmd->result = res;
+       CMD_SP(cmd) = NULL;
        cmd->scsi_done(cmd);
+       if (cwaitq)
+               wake_up(cwaitq);
+       qla2xxx_rel_qpair_sp(sp->qpair, sp);
 }
 
 /* If we are SP1 here, we need to still take and release the host_lock as SP1
@@ -1377,7 +1376,6 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
            vha->host_no, id, lun, sp, cmd, sp->handle);
 
        /* Get a reference to the sp and drop the lock.*/
-
        rval = ha->isp_ops->abort_command(sp);
        if (rval) {
                if (rval == QLA_FUNCTION_PARAMETER_ERROR)
@@ -1394,37 +1392,46 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
        }
 
        spin_lock_irqsave(qpair->qp_lock_ptr, flags);
-       /*
-        * Clear the slot in the oustanding_cmds array if we can't find the
-        * command to reclaim the resources.
-        */
-       if (rval == QLA_FUNCTION_PARAMETER_ERROR)
-               vha->req->outstanding_cmds[sp->handle] = NULL;
 
        /*
-        * sp->done will do ref_count--
-        * sp_get() took an extra count above
+        * Releasing of the SRB and associated command resources
+        * is managed through ref_count.
+        * Whether we need to wait for the abort completion or complete
+        * the abort handler should be based on the ref_count.
         */
-       sp->done(sp, DID_RESET << 16);
-
-       /* Did the command return during mailbox execution? */
-       if (ret == FAILED && !CMD_SP(cmd))
-               ret = SUCCESS;
+       if (atomic_read(&sp->ref_count) > 1) {
+               /*
+                * The command is not yet completed. We need to wait for either
+                * command completion or abort completion.
+                */
+               DECLARE_WAIT_QUEUE_HEAD_ONSTACK(eh_waitq);
+               uint32_t ratov = ha->r_a_tov/10;
 
-       if (!CMD_SP(cmd))
-               wait = 0;
+               /* Go ahead and release the extra ref_count obtained earlier */
+               sp->done(sp, DID_RESET << 16);
+               sp->cwaitq = &eh_waitq;
 
-       spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
-
-       /* Wait for the command to be returned. */
-       if (wait) {
-               if (qla2x00_eh_wait_on_command(cmd) != QLA_SUCCESS) {
-                       ql_log(ql_log_warn, vha, 0x8006,
-                           "Abort handler timed out cmd=%p.\n", cmd);
+               if (!wait_event_lock_irq_timeout(eh_waitq,
+                   CMD_SP(cmd) == NULL, *qpair->qp_lock_ptr,
+                   msecs_to_jiffies(4 * ratov * 1000))) {
+                       /*
+                        * The abort got dropped, LOGO will be sent and the
+                        * original command will be completed with CS_TIMEOUT
+                        * completion
+                        */
+                       ql_dbg(ql_dbg_taskm, vha, 0xffff,
+                           "%s: Abort wait timer (4 * R_A_TOV[%d]) expired\n",
+                           __func__, ha->r_a_tov);
+                       sp->cwaitq = NULL;
                        ret = FAILED;
+                       goto end;
                }
+       } else {
+               /* Command completed while processing the abort */
+               sp->done(sp, DID_RESET << 16);
        }
-
+end:
+       spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
        ql_log(ql_log_info, vha, 0x801c,
            "Abort command issued nexus=%ld:%d:%llu --  %d %x.\n",
            vha->host_no, id, lun, wait, ret);