[SCSI] aacraid: initialization timeout
authorMark Haverkamp <markh@osdl.org>
Tue, 20 Sep 2005 19:56:50 +0000 (12:56 -0700)
committerJames Bottomley <jejb@mulgrave.(none)>
Mon, 26 Sep 2005 22:46:59 +0000 (17:46 -0500)
Received from Mark Salyzyn from Adaptec.

In the rare instances where the adapter, or the motherboard, is
misbehaving; driver initialization or shutdown becomes problematic. By
introducing a 3 minute timeout on the first interrupt driven command
during initialization, or the issuance of the adapter shutdown command
during driver unload, we can resolve the lockup problems induced by
common (but rare) hardware misbehaviors.

The timeout during initialization, should it occur, is accompanied by a
message presented to the console and the logs indicating that the user
should inspect and resolve problems with interrupt routing.

Signed-off-by: Mark Haverkamp <markh@osdl.org>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
drivers/scsi/aacraid/aachba.c
drivers/scsi/aacraid/comminit.c
drivers/scsi/aacraid/commsup.c

index 85d133c40bd39d507616a771c678060c0ae0da16..a0735a247e5b729da2cf2c0f977739cc9f3ea9db 100644 (file)
@@ -692,7 +692,7 @@ int aac_get_adapter_info(struct aac_dev* dev)
                         fibptr, 
                         sizeof(*info),
                         FsaNormal, 
-                        1, 1, 
+                        -1, 1, /* First `interrupt' command uses special wait */
                         NULL, 
                         NULL);
 
index 7f11c8540eadb7d2e259b38954c63a3716ec6c57..9e054a509b41eb8404caf5e60dfe9f749155eb2a 100644 (file)
@@ -195,7 +195,7 @@ int aac_send_shutdown(struct aac_dev * dev)
                          fibctx,
                          sizeof(struct aac_close),
                          FsaNormal,
-                         1, 1,
+                         -2 /* Timeout silently */, 1,
                          NULL, NULL);
 
        if (status == 0)
index 69985b08a270944512ca313f05c9b23fdabd6bad..3b983f3ed9600bca8147e9234f04258de5a87578 100644 (file)
@@ -41,6 +41,7 @@
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_device.h>
 #include <asm/semaphore.h>
+#include <asm/delay.h>
 
 #include "aacraid.h"
 
@@ -541,7 +542,34 @@ int fib_send(u16 command, struct fib * fibptr, unsigned long size,  int priority
     
        if (wait) {
                spin_unlock_irqrestore(&fibptr->event_lock, flags);
-               down(&fibptr->event_wait);
+               /* Only set for first known interruptable command */
+               if (wait < 0) {
+                       /*
+                        * *VERY* Dangerous to time out a command, the
+                        * assumption is made that we have no hope of
+                        * functioning because an interrupt routing or other
+                        * hardware failure has occurred.
+                        */
+                       unsigned long count = 36000000L; /* 3 minutes */
+                       unsigned long qflags;
+                       while (down_trylock(&fibptr->event_wait)) {
+                               if (--count == 0) {
+                                       spin_lock_irqsave(q->lock, qflags);
+                                       q->numpending--;
+                                       list_del(&fibptr->queue);
+                                       spin_unlock_irqrestore(q->lock, qflags);
+                                       if (wait == -1) {
+                                               printk(KERN_ERR "aacraid: fib_send: first asynchronous command timed out.\n"
+                                                 "Usually a result of a PCI interrupt routing problem;\n"
+                                                 "update mother board BIOS or consider utilizing one of\n"
+                                                 "the SAFE mode kernel options (acpi, apic etc)\n");
+                                       }
+                                       return -ETIMEDOUT;
+                               }
+                               udelay(5);
+                       }
+               } else
+                       down(&fibptr->event_wait);
                if(fibptr->done == 0)
                        BUG();