[POWERPC] spufs: make spu page faults not block scheduling
authorArnd Bergmann <arnd.bergmann@de.ibm.com>
Mon, 23 Apr 2007 19:08:15 +0000 (21:08 +0200)
committerArnd Bergmann <arnd@klappe.arndb.de>
Mon, 23 Apr 2007 19:18:55 +0000 (21:18 +0200)
Until now, we have always entered the spu page fault handler
with a mutex for the spu context held. This has multiple
bad side-effects:
- it becomes impossible to suspend the context during
  page faults
- if an spu program attempts to access its own mmio
  areas through DMA, we get an immediate livelock when
  the nopage function tries to acquire the same mutex

This patch makes the page fault logic operate on a
struct spu_context instead of a struct spu, and moves it
from spu_base.c to a new file fault.c inside of spufs.

We now also need to copy the dar and dsisr contents
of the last fault into the saved context to have it
accessible in case we schedule out the context before
activating the page fault handler.

Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
arch/powerpc/platforms/cell/spu_base.c
arch/powerpc/platforms/cell/spufs/Makefile
arch/powerpc/platforms/cell/spufs/backing_ops.c
arch/powerpc/platforms/cell/spufs/fault.c [new file with mode: 0644]
arch/powerpc/platforms/cell/spufs/hw_ops.c
arch/powerpc/platforms/cell/spufs/run.c
arch/powerpc/platforms/cell/spufs/spufs.h
arch/powerpc/platforms/cell/spufs/switch.c
include/asm-powerpc/mmu.h
include/asm-powerpc/spu_csa.h

index 6242f3c19f68f0562fa6ccd9f0fc704738bd4b2c..31fa55f33415e5c84b423c1a25f2e2e5abead409 100644 (file)
@@ -290,7 +290,6 @@ spu_irq_class_1(int irq, void *data)
 
        return stat ? IRQ_HANDLED : IRQ_NONE;
 }
-EXPORT_SYMBOL_GPL(spu_irq_class_1_bottom);
 
 static irqreturn_t
 spu_irq_class_2(int irq, void *data)
@@ -462,108 +461,6 @@ void spu_free(struct spu *spu)
 }
 EXPORT_SYMBOL_GPL(spu_free);
 
-static int spu_handle_mm_fault(struct spu *spu)
-{
-       struct mm_struct *mm = spu->mm;
-       struct vm_area_struct *vma;
-       u64 ea, dsisr, is_write;
-       int ret;
-
-       ea = spu->dar;
-       dsisr = spu->dsisr;
-#if 0
-       if (!IS_VALID_EA(ea)) {
-               return -EFAULT;
-       }
-#endif /* XXX */
-       if (mm == NULL) {
-               return -EFAULT;
-       }
-       if (mm->pgd == NULL) {
-               return -EFAULT;
-       }
-
-       down_read(&mm->mmap_sem);
-       vma = find_vma(mm, ea);
-       if (!vma)
-               goto bad_area;
-       if (vma->vm_start <= ea)
-               goto good_area;
-       if (!(vma->vm_flags & VM_GROWSDOWN))
-               goto bad_area;
-#if 0
-       if (expand_stack(vma, ea))
-               goto bad_area;
-#endif /* XXX */
-good_area:
-       is_write = dsisr & MFC_DSISR_ACCESS_PUT;
-       if (is_write) {
-               if (!(vma->vm_flags & VM_WRITE))
-                       goto bad_area;
-       } else {
-               if (dsisr & MFC_DSISR_ACCESS_DENIED)
-                       goto bad_area;
-               if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
-                       goto bad_area;
-       }
-       ret = 0;
-       switch (handle_mm_fault(mm, vma, ea, is_write)) {
-       case VM_FAULT_MINOR:
-               current->min_flt++;
-               break;
-       case VM_FAULT_MAJOR:
-               current->maj_flt++;
-               break;
-       case VM_FAULT_SIGBUS:
-               ret = -EFAULT;
-               goto bad_area;
-       case VM_FAULT_OOM:
-               ret = -ENOMEM;
-               goto bad_area;
-       default:
-               BUG();
-       }
-       up_read(&mm->mmap_sem);
-       return ret;
-
-bad_area:
-       up_read(&mm->mmap_sem);
-       return -EFAULT;
-}
-
-int spu_irq_class_1_bottom(struct spu *spu)
-{
-       u64 ea, dsisr, access, error = 0UL;
-       int ret = 0;
-
-       ea = spu->dar;
-       dsisr = spu->dsisr;
-       if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)) {
-               u64 flags;
-
-               access = (_PAGE_PRESENT | _PAGE_USER);
-               access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
-               local_irq_save(flags);
-               if (hash_page(ea, access, 0x300) != 0)
-                       error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
-               local_irq_restore(flags);
-       }
-       if (error & CLASS1_ENABLE_STORAGE_FAULT_INTR) {
-               if ((ret = spu_handle_mm_fault(spu)) != 0)
-                       error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
-               else
-                       error &= ~CLASS1_ENABLE_STORAGE_FAULT_INTR;
-       }
-       spu->dar = 0UL;
-       spu->dsisr = 0UL;
-       if (!error) {
-               spu_restart_dma(spu);
-       } else {
-               spu->dma_callback(spu, SPE_EVENT_SPE_DATA_STORAGE);
-       }
-       return ret;
-}
-
 struct sysdev_class spu_sysdev_class = {
        set_kset_name("spu")
 };
index 472217d19faf5b7ac0dd7d352cbeb77510d57513..2cd89c11af5ac328209cdbcf7a0b76dcdd7c196c 100644 (file)
@@ -1,4 +1,4 @@
-obj-y += switch.o
+obj-y += switch.o fault.o
 
 obj-$(CONFIG_SPU_FS) += spufs.o
 spufs-y += inode.o file.o context.o syscalls.o coredump.o
index 1898f0d3a8b876aaafe246c845b82189c298986c..3322528fa6eb687c4f25965e9966cb6f7b364a49 100644 (file)
@@ -350,6 +350,11 @@ static int spu_backing_send_mfc_command(struct spu_context *ctx,
        return ret;
 }
 
+static void spu_backing_restart_dma(struct spu_context *ctx)
+{
+       /* nothing to do here */
+}
+
 struct spu_context_ops spu_backing_ops = {
        .mbox_read = spu_backing_mbox_read,
        .mbox_stat_read = spu_backing_mbox_stat_read,
@@ -376,4 +381,5 @@ struct spu_context_ops spu_backing_ops = {
        .read_mfc_tagstatus = spu_backing_read_mfc_tagstatus,
        .get_mfc_free_elements = spu_backing_get_mfc_free_elements,
        .send_mfc_command = spu_backing_send_mfc_command,
+       .restart_dma = spu_backing_restart_dma,
 };
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
new file mode 100644 (file)
index 0000000..182dc91
--- /dev/null
@@ -0,0 +1,193 @@
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+#include "spufs.h"
+
+/*
+ * This ought to be kept in sync with the powerpc specific do_page_fault
+ * function. Currently, there are a few corner cases that we haven't had
+ * to handle fortunately.
+ */
+static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr)
+{
+       struct vm_area_struct *vma;
+       unsigned long is_write;
+       int ret;
+
+#if 0
+       if (!IS_VALID_EA(ea)) {
+               return -EFAULT;
+       }
+#endif /* XXX */
+       if (mm == NULL) {
+               return -EFAULT;
+       }
+       if (mm->pgd == NULL) {
+               return -EFAULT;
+       }
+
+       down_read(&mm->mmap_sem);
+       vma = find_vma(mm, ea);
+       if (!vma)
+               goto bad_area;
+       if (vma->vm_start <= ea)
+               goto good_area;
+       if (!(vma->vm_flags & VM_GROWSDOWN))
+               goto bad_area;
+       if (expand_stack(vma, ea))
+               goto bad_area;
+good_area:
+       is_write = dsisr & MFC_DSISR_ACCESS_PUT;
+       if (is_write) {
+               if (!(vma->vm_flags & VM_WRITE))
+                       goto bad_area;
+       } else {
+               if (dsisr & MFC_DSISR_ACCESS_DENIED)
+                       goto bad_area;
+               if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+                       goto bad_area;
+       }
+       ret = 0;
+       switch (handle_mm_fault(mm, vma, ea, is_write)) {
+       case VM_FAULT_MINOR:
+               current->min_flt++;
+               break;
+       case VM_FAULT_MAJOR:
+               current->maj_flt++;
+               break;
+       case VM_FAULT_SIGBUS:
+               ret = -EFAULT;
+               goto bad_area;
+       case VM_FAULT_OOM:
+               ret = -ENOMEM;
+               goto bad_area;
+       default:
+               BUG();
+       }
+       up_read(&mm->mmap_sem);
+       return ret;
+
+bad_area:
+       up_read(&mm->mmap_sem);
+       return -EFAULT;
+}
+
+static void spufs_handle_dma_error(struct spu_context *ctx, int type)
+{
+       if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
+               ctx->event_return |= type;
+               wake_up_all(&ctx->stop_wq);
+       } else {
+               switch (type) {
+               case SPE_EVENT_DMA_ALIGNMENT:
+               case SPE_EVENT_SPE_DATA_STORAGE:
+               case SPE_EVENT_INVALID_DMA:
+                       force_sig(SIGBUS, /* info, */ current);
+                       break;
+               case SPE_EVENT_SPE_ERROR:
+                       force_sig(SIGILL, /* info */ current);
+                       break;
+               }
+       }
+}
+
+void spufs_dma_callback(struct spu *spu, int type)
+{
+       spufs_handle_dma_error(spu->ctx, type);
+}
+EXPORT_SYMBOL_GPL(spufs_dma_callback);
+
+/*
+ * bottom half handler for page faults, we can't do this from
+ * interrupt context, since we might need to sleep.
+ * we also need to give up the mutex so we can get scheduled
+ * out while waiting for the backing store.
+ *
+ * TODO: try calling hash_page from the interrupt handler first
+ *       in order to speed up the easy case.
+ */
+int spufs_handle_class1(struct spu_context *ctx)
+{
+       u64 ea, dsisr, access;
+       unsigned long flags;
+       int ret;
+
+       /*
+        * dar and dsisr get passed from the registers
+        * to the spu_context, to this function, but not
+        * back to the spu if it gets scheduled again.
+        *
+        * if we don't handle the fault for a saved context
+        * in time, we can still expect to get the same fault
+        * the immediately after the context restore.
+        */
+       if (ctx->state == SPU_STATE_RUNNABLE) {
+               ea = ctx->spu->dar;
+               dsisr = ctx->spu->dsisr;
+               ctx->spu->dar= ctx->spu->dsisr = 0;
+       } else {
+               ea = ctx->csa.priv1.mfc_dar_RW;
+               dsisr = ctx->csa.priv1.mfc_dsisr_RW;
+               ctx->csa.priv1.mfc_dar_RW = 0;
+               ctx->csa.priv1.mfc_dsisr_RW = 0;
+       }
+
+       if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
+               return 0;
+
+       pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea,
+               dsisr, ctx->state);
+
+       /* we must not hold the lock when entering spu_handle_mm_fault */
+       spu_release(ctx);
+
+       access = (_PAGE_PRESENT | _PAGE_USER);
+       access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+       local_irq_save(flags);
+       ret = hash_page(ea, access, 0x300);
+       local_irq_restore(flags);
+
+       /* hashing failed, so try the actual fault handler */
+       if (ret)
+               ret = spu_handle_mm_fault(current->mm, ea, dsisr);
+
+       spu_acquire(ctx);
+       /*
+        * If we handled the fault successfully and are in runnable
+        * state, restart the DMA.
+        * In case of unhandled error report the problem to user space.
+        */
+       if (!ret) {
+               if (ctx->spu)
+                       ctx->ops->restart_dma(ctx);
+       } else
+               spufs_handle_dma_error(ctx, SPE_EVENT_SPE_DATA_STORAGE);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(spufs_handle_class1);
index ae42e03b8c8687f1b7ff183bc371f33164280e09..428875c5e4ecec35559316967d37c18cdf01a9d9 100644 (file)
@@ -296,6 +296,14 @@ static int spu_hw_send_mfc_command(struct spu_context *ctx,
        }
 }
 
+static void spu_hw_restart_dma(struct spu_context *ctx)
+{
+       struct spu_priv2 __iomem *priv2 = ctx->spu->priv2;
+
+       if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags))
+               out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+}
+
 struct spu_context_ops spu_hw_ops = {
        .mbox_read = spu_hw_mbox_read,
        .mbox_stat_read = spu_hw_mbox_stat_read,
@@ -320,4 +328,5 @@ struct spu_context_ops spu_hw_ops = {
        .read_mfc_tagstatus = spu_hw_read_mfc_tagstatus,
        .get_mfc_free_elements = spu_hw_get_mfc_free_elements,
        .send_mfc_command = spu_hw_send_mfc_command,
+       .restart_dma = spu_hw_restart_dma,
 };
index 7df5202c9a90acf94b9321a9a0d392d01b3bc2b7..1a8195bf75d5c0523822f0788345f7318afdeda4 100644 (file)
@@ -18,27 +18,6 @@ void spufs_stop_callback(struct spu *spu)
        wake_up_all(&ctx->stop_wq);
 }
 
-void spufs_dma_callback(struct spu *spu, int type)
-{
-       struct spu_context *ctx = spu->ctx;
-
-       if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
-               ctx->event_return |= type;
-               wake_up_all(&ctx->stop_wq);
-       } else {
-               switch (type) {
-               case SPE_EVENT_DMA_ALIGNMENT:
-               case SPE_EVENT_SPE_DATA_STORAGE:
-               case SPE_EVENT_INVALID_DMA:
-                       force_sig(SIGBUS, /* info, */ current);
-                       break;
-               case SPE_EVENT_SPE_ERROR:
-                       force_sig(SIGILL, /* info */ current);
-                       break;
-               }
-       }
-}
-
 static inline int spu_stopped(struct spu_context *ctx, u32 * stat)
 {
        struct spu *spu;
@@ -294,11 +273,8 @@ int spu_process_callback(struct spu_context *ctx)
 static inline int spu_process_events(struct spu_context *ctx)
 {
        struct spu *spu = ctx->spu;
-       u64 pte_fault = MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED;
        int ret = 0;
 
-       if (spu->dsisr & pte_fault)
-               ret = spu_irq_class_1_bottom(spu);
        if (spu->class_0_pending)
                ret = spu_irq_class_0_bottom(spu);
        if (!ret && signal_pending(current))
@@ -332,6 +308,10 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
                                break;
                        status &= ~SPU_STATUS_STOPPED_BY_STOP;
                }
+               ret = spufs_handle_class1(ctx);
+               if (ret)
+                       break;
+
                if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) {
                        ret = spu_reacquire_runnable(ctx, npc, &status);
                        if (ret) {
index cae2ad435b0ab4cfb38736e797dcafba9631bc3e..9993c9b3cffcf7f60bc68d4f8304b78a98ccc1b4 100644 (file)
@@ -141,6 +141,7 @@ struct spu_context_ops {
                               struct spu_dma_info * info);
        void (*proxydma_info_read) (struct spu_context * ctx,
                                    struct spu_proxydma_info * info);
+       void (*restart_dma)(struct spu_context *ctx);
 };
 
 extern struct spu_context_ops spu_hw_ops;
@@ -172,6 +173,9 @@ int put_spu_gang(struct spu_gang *gang);
 void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx);
 void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
 
+/* fault handling */
+int spufs_handle_class1(struct spu_context *ctx);
+
 /* context management */
 static inline void spu_acquire(struct spu_context *ctx)
 {
index fd91c73de34e0b4913b93f93aa3222d089ea898d..8347c4a3f894a4bd647cc64b128a6a9089126dda 100644 (file)
@@ -2084,6 +2084,10 @@ int spu_save(struct spu_state *prev, struct spu *spu)
        int rc;
 
        acquire_spu_lock(spu);          /* Step 1.     */
+       prev->dar = spu->dar;
+       prev->dsisr = spu->dsisr;
+       spu->dar = 0;
+       spu->dsisr = 0;
        rc = __do_spu_save(prev, spu);  /* Steps 2-53. */
        release_spu_lock(spu);
        if (rc != 0 && rc != 2 && rc != 6) {
@@ -2109,9 +2113,9 @@ int spu_restore(struct spu_state *new, struct spu *spu)
 
        acquire_spu_lock(spu);
        harvest(NULL, spu);
-       spu->dar = 0;
-       spu->dsisr = 0;
        spu->slb_replace = 0;
+       new->dar = 0;
+       new->dsisr = 0;
        spu->class_0_pending = 0;
        rc = __do_spu_restore(new, spu);
        release_spu_lock(spu);
index 200055a4b82b5d983504e22df1de7550a3b104eb..e22fd8811505037da9090223883301a4f62a5484 100644 (file)
@@ -234,6 +234,7 @@ extern int __hash_page_64K(unsigned long ea, unsigned long access,
                           unsigned long vsid, pte_t *ptep, unsigned long trap,
                           unsigned int local);
 struct mm_struct;
+extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap);
 extern int hash_huge_page(struct mm_struct *mm, unsigned long access,
                          unsigned long ea, unsigned long vsid, int local,
                          unsigned long trap);
index 8aad0619eb8e5a43e7365bf8a5e27199e5fc264d..02e56a6685a24baf9216336bbe2c3f1c6308829e 100644 (file)
@@ -242,6 +242,7 @@ struct spu_state {
        u64 spu_chnldata_RW[32];
        u32 spu_mailbox_data[4];
        u32 pu_mailbox_data[1];
+       u64 dar, dsisr;
        unsigned long suspend_time;
        spinlock_t register_lock;
 };