[POWERPC] spufs: make spu page faults not block scheduling

author Arnd Bergmann <arnd.bergmann@de.ibm.com>

Mon, 23 Apr 2007 19:08:15 +0000 (21:08 +0200)

committer Arnd Bergmann <arnd@klappe.arndb.de>

Mon, 23 Apr 2007 19:18:55 +0000 (21:18 +0200)
author Arnd Bergmann <arnd.bergmann@de.ibm.com>
Mon, 23 Apr 2007 19:08:15 +0000 (21:08 +0200)
committer Arnd Bergmann <arnd@klappe.arndb.de>
Mon, 23 Apr 2007 19:18:55 +0000 (21:18 +0200)
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c

index 6242f3c19f68f0562fa6ccd9f0fc704738bd4b2c..31fa55f33415e5c84b423c1a25f2e2e5abead409 100644 (file)
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -290,7 +290,6 @@ spu_irq_class_1(int irq, void *data)
  
         return stat ? IRQ_HANDLED : IRQ_NONE;
  }
-EXPORT_SYMBOL_GPL(spu_irq_class_1_bottom);
  
  static irqreturn_t
  spu_irq_class_2(int irq, void *data)
@@ -462,108 +461,6 @@ void spu_free(struct spu *spu)
  }
  EXPORT_SYMBOL_GPL(spu_free);
  
-static int spu_handle_mm_fault(struct spu *spu)
-{
-       struct mm_struct *mm = spu->mm;
-       struct vm_area_struct *vma;
-       u64 ea, dsisr, is_write;
-       int ret;
-
-       ea = spu->dar;
-       dsisr = spu->dsisr;
-#if 0
-       if (!IS_VALID_EA(ea)) {
-               return -EFAULT;
-       }
-#endif /* XXX */
-       if (mm == NULL) {
-               return -EFAULT;
-       }
-       if (mm->pgd == NULL) {
-               return -EFAULT;
-       }
-
-       down_read(&mm->mmap_sem);
-       vma = find_vma(mm, ea);
-       if (!vma)
-               goto bad_area;
-       if (vma->vm_start <= ea)
-               goto good_area;
-       if (!(vma->vm_flags & VM_GROWSDOWN))
-               goto bad_area;
-#if 0
-       if (expand_stack(vma, ea))
-               goto bad_area;
-#endif /* XXX */
-good_area:
-       is_write = dsisr & MFC_DSISR_ACCESS_PUT;
-       if (is_write) {
-               if (!(vma->vm_flags & VM_WRITE))
-                       goto bad_area;
-       } else {
-               if (dsisr & MFC_DSISR_ACCESS_DENIED)
-                       goto bad_area;
-               if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
-                       goto bad_area;
-       }
-       ret = 0;
-       switch (handle_mm_fault(mm, vma, ea, is_write)) {
-       case VM_FAULT_MINOR:
-               current->min_flt++;
-               break;
-       case VM_FAULT_MAJOR:
-               current->maj_flt++;
-               break;
-       case VM_FAULT_SIGBUS:
-               ret = -EFAULT;
-               goto bad_area;
-       case VM_FAULT_OOM:
-               ret = -ENOMEM;
-               goto bad_area;
-       default:
-               BUG();
-       }
-       up_read(&mm->mmap_sem);
-       return ret;
-
-bad_area:
-       up_read(&mm->mmap_sem);
-       return -EFAULT;
-}
-
-int spu_irq_class_1_bottom(struct spu *spu)
-{
-       u64 ea, dsisr, access, error = 0UL;
-       int ret = 0;
-
-       ea = spu->dar;
-       dsisr = spu->dsisr;
-       if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)) {
-               u64 flags;
-
-               access = (_PAGE_PRESENT | _PAGE_USER);
-               access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
-               local_irq_save(flags);
-               if (hash_page(ea, access, 0x300) != 0)
-                       error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
-               local_irq_restore(flags);
-       }
-       if (error & CLASS1_ENABLE_STORAGE_FAULT_INTR) {
-               if ((ret = spu_handle_mm_fault(spu)) != 0)
-                       error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
-               else
-                       error &= ~CLASS1_ENABLE_STORAGE_FAULT_INTR;
-       }
-       spu->dar = 0UL;
-       spu->dsisr = 0UL;
-       if (!error) {
-               spu_restart_dma(spu);
-       } else {
-               spu->dma_callback(spu, SPE_EVENT_SPE_DATA_STORAGE);
-       }
-       return ret;
-}
-
  struct sysdev_class spu_sysdev_class = {
         set_kset_name("spu")
  };
diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile

index 472217d19faf5b7ac0dd7d352cbeb77510d57513..2cd89c11af5ac328209cdbcf7a0b76dcdd7c196c 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/Makefile
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@@ -1,4 +1,4 @@
-obj-y += switch.o
+obj-y += switch.o fault.o
  
  obj-$(CONFIG_SPU_FS) += spufs.o
  spufs-y += inode.o file.o context.o syscalls.o coredump.o
diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c

index 1898f0d3a8b876aaafe246c845b82189c298986c..3322528fa6eb687c4f25965e9966cb6f7b364a49 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/backing_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -350,6 +350,11 @@ static int spu_backing_send_mfc_command(struct spu_context *ctx,
         return ret;
  }
  
+static void spu_backing_restart_dma(struct spu_context *ctx)
+{
+       /* nothing to do here */
+}
+
  struct spu_context_ops spu_backing_ops = {
         .mbox_read = spu_backing_mbox_read,
         .mbox_stat_read = spu_backing_mbox_stat_read,
@@ -376,4 +381,5 @@ struct spu_context_ops spu_backing_ops = {
         .read_mfc_tagstatus = spu_backing_read_mfc_tagstatus,
         .get_mfc_free_elements = spu_backing_get_mfc_free_elements,
         .send_mfc_command = spu_backing_send_mfc_command,
+       .restart_dma = spu_backing_restart_dma,
  };
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c

new file mode 100644 (file)

index 0000000..182dc91
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -0,0 +1,193 @@
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+#include "spufs.h"
+
+/*
+ * This ought to be kept in sync with the powerpc specific do_page_fault
+ * function. Currently, there are a few corner cases that we haven't had
+ * to handle fortunately.
+ */
+static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr)
+{
+       struct vm_area_struct *vma;
+       unsigned long is_write;
+       int ret;
+
+#if 0
+       if (!IS_VALID_EA(ea)) {
+               return -EFAULT;
+       }
+#endif /* XXX */
+       if (mm == NULL) {
+               return -EFAULT;
+       }
+       if (mm->pgd == NULL) {
+               return -EFAULT;
+       }
+
+       down_read(&mm->mmap_sem);
+       vma = find_vma(mm, ea);
+       if (!vma)
+               goto bad_area;
+       if (vma->vm_start <= ea)
+               goto good_area;
+       if (!(vma->vm_flags & VM_GROWSDOWN))
+               goto bad_area;
+       if (expand_stack(vma, ea))
+               goto bad_area;
+good_area:
+       is_write = dsisr & MFC_DSISR_ACCESS_PUT;
+       if (is_write) {
+               if (!(vma->vm_flags & VM_WRITE))
+                       goto bad_area;
+       } else {
+               if (dsisr & MFC_DSISR_ACCESS_DENIED)
+                       goto bad_area;
+               if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+                       goto bad_area;
+       }
+       ret = 0;
+       switch (handle_mm_fault(mm, vma, ea, is_write)) {
+       case VM_FAULT_MINOR:
+               current->min_flt++;
+               break;
+       case VM_FAULT_MAJOR:
+               current->maj_flt++;
+               break;
+       case VM_FAULT_SIGBUS:
+               ret = -EFAULT;
+               goto bad_area;
+       case VM_FAULT_OOM:
+               ret = -ENOMEM;
+               goto bad_area;
+       default:
+               BUG();
+       }
+       up_read(&mm->mmap_sem);
+       return ret;
+
+bad_area:
+       up_read(&mm->mmap_sem);
+       return -EFAULT;
+}
+
+static void spufs_handle_dma_error(struct spu_context *ctx, int type)
+{
+       if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
+               ctx->event_return |= type;
+               wake_up_all(&ctx->stop_wq);
+       } else {
+               switch (type) {
+               case SPE_EVENT_DMA_ALIGNMENT:
+               case SPE_EVENT_SPE_DATA_STORAGE:
+               case SPE_EVENT_INVALID_DMA:
+                       force_sig(SIGBUS, /* info, */ current);
+                       break;
+               case SPE_EVENT_SPE_ERROR:
+                       force_sig(SIGILL, /* info */ current);
+                       break;
+               }
+       }
+}
+
+void spufs_dma_callback(struct spu *spu, int type)
+{
+       spufs_handle_dma_error(spu->ctx, type);
+}
+EXPORT_SYMBOL_GPL(spufs_dma_callback);
+
+/*
+ * bottom half handler for page faults, we can't do this from
+ * interrupt context, since we might need to sleep.
+ * we also need to give up the mutex so we can get scheduled
+ * out while waiting for the backing store.
+ *
+ * TODO: try calling hash_page from the interrupt handler first
+ *       in order to speed up the easy case.
+ */
+int spufs_handle_class1(struct spu_context *ctx)
+{
+       u64 ea, dsisr, access;
+       unsigned long flags;
+       int ret;
+
+       /*
+        * dar and dsisr get passed from the registers
+        * to the spu_context, to this function, but not
+        * back to the spu if it gets scheduled again.
+        *
+        * if we don't handle the fault for a saved context
+        * in time, we can still expect to get the same fault
+        * the immediately after the context restore.
+        */
+       if (ctx->state == SPU_STATE_RUNNABLE) {
+               ea = ctx->spu->dar;
+               dsisr = ctx->spu->dsisr;
+               ctx->spu->dar= ctx->spu->dsisr = 0;
+       } else {
+               ea = ctx->csa.priv1.mfc_dar_RW;
+               dsisr = ctx->csa.priv1.mfc_dsisr_RW;
+               ctx->csa.priv1.mfc_dar_RW = 0;
+               ctx->csa.priv1.mfc_dsisr_RW = 0;
+       }
+
+       if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
+               return 0;
+
+       pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea,
+               dsisr, ctx->state);
+
+       /* we must not hold the lock when entering spu_handle_mm_fault */
+       spu_release(ctx);
+
+       access = (_PAGE_PRESENT | _PAGE_USER);
+       access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+       local_irq_save(flags);
+       ret = hash_page(ea, access, 0x300);
+       local_irq_restore(flags);
+
+       /* hashing failed, so try the actual fault handler */
+       if (ret)
+               ret = spu_handle_mm_fault(current->mm, ea, dsisr);
+
+       spu_acquire(ctx);
+       /*
+        * If we handled the fault successfully and are in runnable
+        * state, restart the DMA.
+        * In case of unhandled error report the problem to user space.
+        */
+       if (!ret) {
+               if (ctx->spu)
+                       ctx->ops->restart_dma(ctx);
+       } else
+               spufs_handle_dma_error(ctx, SPE_EVENT_SPE_DATA_STORAGE);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(spufs_handle_class1);
diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c

index ae42e03b8c8687f1b7ff183bc371f33164280e09..428875c5e4ecec35559316967d37c18cdf01a9d9 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/hw_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c
@@ -296,6 +296,14 @@ static int spu_hw_send_mfc_command(struct spu_context *ctx,
         }
  }
  
+static void spu_hw_restart_dma(struct spu_context *ctx)
+{
+       struct spu_priv2 __iomem *priv2 = ctx->spu->priv2;
+
+       if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags))
+               out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+}
+
  struct spu_context_ops spu_hw_ops = {
         .mbox_read = spu_hw_mbox_read,
         .mbox_stat_read = spu_hw_mbox_stat_read,
@@ -320,4 +328,5 @@ struct spu_context_ops spu_hw_ops = {
         .read_mfc_tagstatus = spu_hw_read_mfc_tagstatus,
         .get_mfc_free_elements = spu_hw_get_mfc_free_elements,
         .send_mfc_command = spu_hw_send_mfc_command,
+       .restart_dma = spu_hw_restart_dma,
  };
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c

index 7df5202c9a90acf94b9321a9a0d392d01b3bc2b7..1a8195bf75d5c0523822f0788345f7318afdeda4 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -18,27 +18,6 @@ void spufs_stop_callback(struct spu *spu)
         wake_up_all(&ctx->stop_wq);
  }
  
-void spufs_dma_callback(struct spu *spu, int type)
-{
-       struct spu_context *ctx = spu->ctx;
-
-       if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
-               ctx->event_return |= type;
-               wake_up_all(&ctx->stop_wq);
-       } else {
-               switch (type) {
-               case SPE_EVENT_DMA_ALIGNMENT:
-               case SPE_EVENT_SPE_DATA_STORAGE:
-               case SPE_EVENT_INVALID_DMA:
-                       force_sig(SIGBUS, /* info, */ current);
-                       break;
-               case SPE_EVENT_SPE_ERROR:
-                       force_sig(SIGILL, /* info */ current);
-                       break;
-               }
-       }
-}
-
  static inline int spu_stopped(struct spu_context *ctx, u32 * stat)
  {
         struct spu *spu;
@@ -294,11 +273,8 @@ int spu_process_callback(struct spu_context *ctx)
  static inline int spu_process_events(struct spu_context *ctx)
  {
         struct spu *spu = ctx->spu;
-       u64 pte_fault = MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED;
         int ret = 0;
  
-       if (spu->dsisr & pte_fault)
-               ret = spu_irq_class_1_bottom(spu);
         if (spu->class_0_pending)
                 ret = spu_irq_class_0_bottom(spu);
         if (!ret && signal_pending(current))
@@ -332,6 +308,10 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
                                 break;
                         status &= ~SPU_STATUS_STOPPED_BY_STOP;
                 }
+               ret = spufs_handle_class1(ctx);
+               if (ret)
+                       break;
+
                 if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) {
                         ret = spu_reacquire_runnable(ctx, npc, &status);
                         if (ret) {
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h

index cae2ad435b0ab4cfb38736e797dcafba9631bc3e..9993c9b3cffcf7f60bc68d4f8304b78a98ccc1b4 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -141,6 +141,7 @@ struct spu_context_ops {
                                struct spu_dma_info * info);
         void (*proxydma_info_read) (struct spu_context * ctx,
                                     struct spu_proxydma_info * info);
+       void (*restart_dma)(struct spu_context *ctx);
  };
  
  extern struct spu_context_ops spu_hw_ops;
@@ -172,6 +173,9 @@ int put_spu_gang(struct spu_gang *gang);
  void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx);
  void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
  
+/* fault handling */
+int spufs_handle_class1(struct spu_context *ctx);
+
  /* context management */
  static inline void spu_acquire(struct spu_context *ctx)
  {
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c

index fd91c73de34e0b4913b93f93aa3222d089ea898d..8347c4a3f894a4bd647cc64b128a6a9089126dda 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -2084,6 +2084,10 @@ int spu_save(struct spu_state *prev, struct spu *spu)
         int rc;
  
         acquire_spu_lock(spu);          /* Step 1.     */
+       prev->dar = spu->dar;
+       prev->dsisr = spu->dsisr;
+       spu->dar = 0;
+       spu->dsisr = 0;
         rc = __do_spu_save(prev, spu);  /* Steps 2-53. */
         release_spu_lock(spu);
         if (rc != 0 && rc != 2 && rc != 6) {
@@ -2109,9 +2113,9 @@ int spu_restore(struct spu_state *new, struct spu *spu)
  
         acquire_spu_lock(spu);
         harvest(NULL, spu);
-       spu->dar = 0;
-       spu->dsisr = 0;
         spu->slb_replace = 0;
+       new->dar = 0;
+       new->dsisr = 0;
         spu->class_0_pending = 0;
         rc = __do_spu_restore(new, spu);
         release_spu_lock(spu);
diff --git a/include/asm-powerpc/mmu.h b/include/asm-powerpc/mmu.h

index 200055a4b82b5d983504e22df1de7550a3b104eb..e22fd8811505037da9090223883301a4f62a5484 100644 (file)
--- a/include/asm-powerpc/mmu.h
+++ b/include/asm-powerpc/mmu.h
@@ -234,6 +234,7 @@ extern int __hash_page_64K(unsigned long ea, unsigned long access,
                            unsigned long vsid, pte_t *ptep, unsigned long trap,
                            unsigned int local);
  struct mm_struct;
+extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap);
  extern int hash_huge_page(struct mm_struct *mm, unsigned long access,
                           unsigned long ea, unsigned long vsid, int local,
                           unsigned long trap);
diff --git a/include/asm-powerpc/spu_csa.h b/include/asm-powerpc/spu_csa.h

index 8aad0619eb8e5a43e7365bf8a5e27199e5fc264d..02e56a6685a24baf9216336bbe2c3f1c6308829e 100644 (file)
--- a/include/asm-powerpc/spu_csa.h
+++ b/include/asm-powerpc/spu_csa.h
@@ -242,6 +242,7 @@ struct spu_state {
         u64 spu_chnldata_RW[32];
         u32 spu_mailbox_data[4];
         u32 pu_mailbox_data[1];
+       u64 dar, dsisr;
         unsigned long suspend_time;
         spinlock_t register_lock;
  };
author	Arnd Bergmann <arnd.bergmann@de.ibm.com>
	Mon, 23 Apr 2007 19:08:15 +0000 (21:08 +0200)
committer	Arnd Bergmann <arnd@klappe.arndb.de>
	Mon, 23 Apr 2007 19:18:55 +0000 (21:18 +0200)
arch/powerpc/platforms/cell/spu_base.c		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/Makefile		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/backing_ops.c		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/fault.c	[new file with mode: 0644]	patch \| blob
arch/powerpc/platforms/cell/spufs/hw_ops.c		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/run.c		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/spufs.h		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/switch.c		patch \| blob \| history
include/asm-powerpc/mmu.h		patch \| blob \| history
include/asm-powerpc/spu_csa.h		patch \| blob \| history