scsi: cxlflash: Handle spurious interrupts
authorUma Krishnan <ukrishn@linux.vnet.ibm.com>
Mon, 26 Mar 2018 16:35:42 +0000 (11:35 -0500)
committerMartin K. Petersen <martin.petersen@oracle.com>
Wed, 18 Apr 2018 23:32:51 +0000 (19:32 -0400)
The following Oops can occur when there is heavy I/O traffic and the host is
reset by a tool such as sg_reset.

[c000200fff3fbc90c00800001690117c process_cmd_doneq+0x104/0x500
                                       [cxlflash] (unreliable)
[c000200fff3fbd80c008000016901648 cxlflash_rrq_irq+0xd0/0x150 [cxlflash]
[c000200fff3fbde0c000000000193130 __handle_irq_event_percpu+0xa0/0x310
[c000200fff3fbea0c0000000001933d8 handle_irq_event_percpu+0x38/0x90
[c000200fff3fbee0c000000000193494 handle_irq_event+0x64/0xb0
[c000200fff3fbf10c000000000198ea0 handle_fasteoi_irq+0xc0/0x230
[c000200fff3fbf40c00000000019182c generic_handle_irq+0x4c/0x70
[c000200fff3fbf60c00000000001794c __do_irq+0x7c/0x1c0
[c000200fff3fbf90c00000000002a390 call_do_irq+0x14/0x24
[c000200e5828fab0c000000000017b2c do_IRQ+0x9c/0x130
[c000200e5828fb00c000000000009b04 h_virt_irq_common+0x114/0x120

When a context is reset, the pending commands are flushed and the AFU is
notified. Before the AFU handles this request there could be command
completion interrupts queued to PHB which are yet to be delivered to the
context. In this scenario, a context could receive an interrupt for a command
that has been flushed, leading to a possible crash when the memory for the
flushed command is accessed.

To resolve this problem, a boolean will indicate if the hardware queue is
ready to process interrupts or not. This can be evaluated in the interrupt
handler before proessing an interrupt.

Signed-off-by: Uma Krishnan <ukrishn@linux.vnet.ibm.com>
Acked-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/cxlflash/common.h
drivers/scsi/cxlflash/main.c

index fffa2c1699ca62f0958bb0ce13b47df10afcd09f..89240b84745c84ebe14e201efc28abf832bfcd3e 100644 (file)
@@ -224,6 +224,7 @@ struct hwq {
        u64 *hrrq_end;
        u64 *hrrq_curr;
        bool toggle;
+       bool hrrq_online;
 
        s64 room;
 
index c9203282d943621b0d770ee46e40cb768cbc0a3e..a24d7e6e51c10b45779f09fc6e6e5782ffe215f5 100644 (file)
@@ -801,6 +801,10 @@ static void term_mc(struct cxlflash_cfg *cfg, u32 index)
                WARN_ON(cfg->ops->release_context(hwq->ctx_cookie));
        hwq->ctx_cookie = NULL;
 
+       spin_lock_irqsave(&hwq->hrrq_slock, lock_flags);
+       hwq->hrrq_online = false;
+       spin_unlock_irqrestore(&hwq->hrrq_slock, lock_flags);
+
        spin_lock_irqsave(&hwq->hsq_slock, lock_flags);
        flush_pending_cmds(hwq);
        spin_unlock_irqrestore(&hwq->hsq_slock, lock_flags);
@@ -1475,6 +1479,12 @@ static irqreturn_t cxlflash_rrq_irq(int irq, void *data)
 
        spin_lock_irqsave(&hwq->hrrq_slock, hrrq_flags);
 
+       /* Silently drop spurious interrupts when queue is not online */
+       if (!hwq->hrrq_online) {
+               spin_unlock_irqrestore(&hwq->hrrq_slock, hrrq_flags);
+               return IRQ_HANDLED;
+       }
+
        if (afu_is_irqpoll_enabled(afu)) {
                irq_poll_sched(&hwq->irqpoll);
                spin_unlock_irqrestore(&hwq->hrrq_slock, hrrq_flags);
@@ -1781,6 +1791,7 @@ static int init_global(struct cxlflash_cfg *cfg)
 
                writeq_be((u64) hwq->hrrq_start, &hmap->rrq_start);
                writeq_be((u64) hwq->hrrq_end, &hmap->rrq_end);
+               hwq->hrrq_online = true;
 
                if (afu_is_sq_cmd_mode(afu)) {
                        writeq_be((u64)hwq->hsq_start, &hmap->sq_start);