[SCSI] ibmvscsi: requeue while CRQ closed
authorRobert Jennings <rcj@linux.vnet.ibm.com>
Mon, 12 Nov 2007 15:00:23 +0000 (09:00 -0600)
committerJames Bottomley <James.Bottomley@HansenPartnership.com>
Sat, 12 Jan 2008 00:22:45 +0000 (18:22 -0600)
CRQ send errors that return with H_CLOSED should return with
SCSI_MLQUEUE_HOST_BUSY until firmware alerts the client of a CRQ
transport event.  The transport event will either reinitialize and
requeue the requests or fail and return IO with DID_ERROR.

To avoid failing the eh_* functions while re-attaching to the server
adapter this will retry for a period of time while ibmvscsi_send_srp_event
returns SCSI_MLQUEUE_HOST_BUSY.

In ibmvscsi_eh_abort_handler() the loop includes the search of the
event list.  The lock on the hostdata is dropped while waiting to try
again after failing ibmvscsi_send_srp_event.  The event could have been
purged if a login was in progress when the function was called.

In ibmvscsi_eh_device_reset_handler() the loop includes the call to
get_event_struct() because a failing call to ibmvscsi_send_srp_event()
will have freed the event struct.

Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com>
Signed-off-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
drivers/scsi/ibmvscsi/ibmvscsi.c

index 50120c8c164b7fae2e24c36b96e220449c34a6b8..30819012898fca80eb828e81ed2a3aa2c3d308cd 100644 (file)
@@ -629,6 +629,16 @@ static int ibmvscsi_send_srp_event(struct srp_event_struct *evt_struct,
                list_del(&evt_struct->list);
                del_timer(&evt_struct->timer);
 
+               /* If send_crq returns H_CLOSED, return SCSI_MLQUEUE_HOST_BUSY.
+                * Firmware will send a CRQ with a transport event (0xFF) to
+                * tell this client what has happened to the transport.  This
+                * will be handled in ibmvscsi_handle_crq()
+                */
+               if (rc == H_CLOSED) {
+                       dev_warn(hostdata->dev, "send warning. "
+                                "Receive queue closed, will retry.\n");
+                       goto send_busy;
+               }
                dev_err(hostdata->dev, "send error %d\n", rc);
                atomic_inc(&hostdata->request_limit);
                goto send_error;
@@ -976,58 +986,74 @@ static int ibmvscsi_eh_abort_handler(struct scsi_cmnd *cmd)
        int rsp_rc;
        unsigned long flags;
        u16 lun = lun_from_dev(cmd->device);
+       unsigned long wait_switch = 0;
 
        /* First, find this command in our sent list so we can figure
         * out the correct tag
         */
        spin_lock_irqsave(hostdata->host->host_lock, flags);
-       found_evt = NULL;
-       list_for_each_entry(tmp_evt, &hostdata->sent, list) {
-               if (tmp_evt->cmnd == cmd) {
-                       found_evt = tmp_evt;
-                       break;
+       wait_switch = jiffies + (init_timeout * HZ);
+       do {
+               found_evt = NULL;
+               list_for_each_entry(tmp_evt, &hostdata->sent, list) {
+                       if (tmp_evt->cmnd == cmd) {
+                               found_evt = tmp_evt;
+                               break;
+                       }
                }
-       }
 
-       if (!found_evt) {
-               spin_unlock_irqrestore(hostdata->host->host_lock, flags);
-               return SUCCESS;
-       }
+               if (!found_evt) {
+                       spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+                       return SUCCESS;
+               }
 
-       evt = get_event_struct(&hostdata->pool);
-       if (evt == NULL) {
-               spin_unlock_irqrestore(hostdata->host->host_lock, flags);
-               sdev_printk(KERN_ERR, cmd->device, "failed to allocate abort event\n");
-               return FAILED;
-       }
+               evt = get_event_struct(&hostdata->pool);
+               if (evt == NULL) {
+                       spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+                       sdev_printk(KERN_ERR, cmd->device,
+                               "failed to allocate abort event\n");
+                       return FAILED;
+               }
        
-       init_event_struct(evt,
-                         sync_completion,
-                         VIOSRP_SRP_FORMAT,
-                         init_timeout);
+               init_event_struct(evt,
+                                 sync_completion,
+                                 VIOSRP_SRP_FORMAT,
+                                 init_timeout);
 
-       tsk_mgmt = &evt->iu.srp.tsk_mgmt;
+               tsk_mgmt = &evt->iu.srp.tsk_mgmt;
        
-       /* Set up an abort SRP command */
-       memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt));
-       tsk_mgmt->opcode = SRP_TSK_MGMT;
-       tsk_mgmt->lun = ((u64) lun) << 48;
-       tsk_mgmt->tsk_mgmt_func = SRP_TSK_ABORT_TASK;
-       tsk_mgmt->task_tag = (u64) found_evt;
-
-       sdev_printk(KERN_INFO, cmd->device, "aborting command. lun 0x%lx, tag 0x%lx\n",
-                   tsk_mgmt->lun, tsk_mgmt->task_tag);
-
-       evt->sync_srp = &srp_rsp;
-       init_completion(&evt->comp);
-       rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
+               /* Set up an abort SRP command */
+               memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt));
+               tsk_mgmt->opcode = SRP_TSK_MGMT;
+               tsk_mgmt->lun = ((u64) lun) << 48;
+               tsk_mgmt->tsk_mgmt_func = SRP_TSK_ABORT_TASK;
+               tsk_mgmt->task_tag = (u64) found_evt;
+
+               evt->sync_srp = &srp_rsp;
+
+               init_completion(&evt->comp);
+               rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
+
+               if (rsp_rc != SCSI_MLQUEUE_HOST_BUSY)
+                       break;
+
+               spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+               msleep(10);
+               spin_lock_irqsave(hostdata->host->host_lock, flags);
+       } while (time_before(jiffies, wait_switch));
+
        spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+
        if (rsp_rc != 0) {
                sdev_printk(KERN_ERR, cmd->device,
                            "failed to send abort() event. rc=%d\n", rsp_rc);
                return FAILED;
        }
 
+       sdev_printk(KERN_INFO, cmd->device,
+                    "aborting command. lun 0x%lx, tag 0x%lx\n",
+                   (((u64) lun) << 48), (u64) found_evt);
+
        wait_for_completion(&evt->comp);
 
        /* make sure we got a good response */
@@ -1099,41 +1125,56 @@ static int ibmvscsi_eh_device_reset_handler(struct scsi_cmnd *cmd)
        int rsp_rc;
        unsigned long flags;
        u16 lun = lun_from_dev(cmd->device);
+       unsigned long wait_switch = 0;
 
        spin_lock_irqsave(hostdata->host->host_lock, flags);
-       evt = get_event_struct(&hostdata->pool);
-       if (evt == NULL) {
-               spin_unlock_irqrestore(hostdata->host->host_lock, flags);
-               sdev_printk(KERN_ERR, cmd->device, "failed to allocate reset event\n");
-               return FAILED;
-       }
+       wait_switch = jiffies + (init_timeout * HZ);
+       do {
+               evt = get_event_struct(&hostdata->pool);
+               if (evt == NULL) {
+                       spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+                       sdev_printk(KERN_ERR, cmd->device,
+                               "failed to allocate reset event\n");
+                       return FAILED;
+               }
        
-       init_event_struct(evt,
-                         sync_completion,
-                         VIOSRP_SRP_FORMAT,
-                         init_timeout);
+               init_event_struct(evt,
+                                 sync_completion,
+                                 VIOSRP_SRP_FORMAT,
+                                 init_timeout);
 
-       tsk_mgmt = &evt->iu.srp.tsk_mgmt;
+               tsk_mgmt = &evt->iu.srp.tsk_mgmt;
 
-       /* Set up a lun reset SRP command */
-       memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt));
-       tsk_mgmt->opcode = SRP_TSK_MGMT;
-       tsk_mgmt->lun = ((u64) lun) << 48;
-       tsk_mgmt->tsk_mgmt_func = SRP_TSK_LUN_RESET;
+               /* Set up a lun reset SRP command */
+               memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt));
+               tsk_mgmt->opcode = SRP_TSK_MGMT;
+               tsk_mgmt->lun = ((u64) lun) << 48;
+               tsk_mgmt->tsk_mgmt_func = SRP_TSK_LUN_RESET;
 
-       sdev_printk(KERN_INFO, cmd->device, "resetting device. lun 0x%lx\n",
-                   tsk_mgmt->lun);
+               evt->sync_srp = &srp_rsp;
+
+               init_completion(&evt->comp);
+               rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
+
+               if (rsp_rc != SCSI_MLQUEUE_HOST_BUSY)
+                       break;
+
+               spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+               msleep(10);
+               spin_lock_irqsave(hostdata->host->host_lock, flags);
+       } while (time_before(jiffies, wait_switch));
 
-       evt->sync_srp = &srp_rsp;
-       init_completion(&evt->comp);
-       rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
        spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+
        if (rsp_rc != 0) {
                sdev_printk(KERN_ERR, cmd->device,
                            "failed to send reset event. rc=%d\n", rsp_rc);
                return FAILED;
        }
 
+       sdev_printk(KERN_INFO, cmd->device, "resetting device. lun 0x%lx\n",
+                   (((u64) lun) << 48));
+
        wait_for_completion(&evt->comp);
 
        /* make sure we got a good response */