[PATCH] spufs: set irq affinity for running threads
authorArnd Bergmann <arnd@arndb.de>
Thu, 5 Jan 2006 14:05:29 +0000 (14:05 +0000)
committerPaul Mackerras <paulus@samba.org>
Mon, 9 Jan 2006 04:44:57 +0000 (15:44 +1100)
For far, all SPU triggered interrupts always end up on
the first SMT thread, which is a bad solution.

This patch implements setting the affinity to the
CPU that was running last when entering execution on
an SPU. This should result in a significant reduction
in IPI calls and better cache locality for SPE thread
specific data.

Signed-off-by: Arnd Bergmann <arndb@de.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
arch/powerpc/platforms/cell/interrupt.c
arch/powerpc/platforms/cell/interrupt.h
arch/powerpc/platforms/cell/spu_base.c
arch/powerpc/platforms/cell/spufs/sched.c
include/asm-powerpc/spu.h

index 7fbe78a9327db993b5e386ab1d6f8f760852d917..63aa52acf4412171a483e588d70e65080672930a 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/config.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/module.h>
 #include <linux/percpu.h>
 #include <linux/types.h>
 
@@ -55,6 +56,7 @@ struct iic_regs {
 
 struct iic {
        struct iic_regs __iomem *regs;
+       u8 target_id;
 };
 
 static DEFINE_PER_CPU(struct iic, iic);
@@ -172,12 +174,11 @@ int iic_get_irq(struct pt_regs *regs)
        return irq;
 }
 
-static struct iic_regs __iomem *find_iic(int cpu)
+static int setup_iic(int cpu, struct iic *iic)
 {
        struct device_node *np;
        int nodeid = cpu / 2;
        unsigned long regs;
-       struct iic_regs __iomem *iic_regs;
 
        for (np = of_find_node_by_type(NULL, "cpu");
             np;
@@ -188,20 +189,23 @@ static struct iic_regs __iomem *find_iic(int cpu)
 
        if (!np) {
                printk(KERN_WARNING "IIC: CPU %d not found\n", cpu);
-               iic_regs = NULL;
-       } else {
-               regs = *(long *)get_property(np, "iic", NULL);
-
-               /* hack until we have decided on the devtree info */
-               regs += 0x400;
-               if (cpu & 1)
-                       regs += 0x20;
-
-               printk(KERN_DEBUG "IIC for CPU %d at %lx\n", cpu, regs);
-               iic_regs = __ioremap(regs, sizeof(struct iic_regs),
-                                                _PAGE_NO_CACHE);
+               iic->regs = NULL;
+               iic->target_id = 0xff;
+               return -ENODEV;
        }
-       return iic_regs;
+
+       regs = *(long *)get_property(np, "iic", NULL);
+
+       /* hack until we have decided on the devtree info */
+       regs += 0x400;
+       if (cpu & 1)
+               regs += 0x20;
+
+       printk(KERN_DEBUG "IIC for CPU %d at %lx\n", cpu, regs);
+       iic->regs = __ioremap(regs, sizeof(struct iic_regs),
+                                        _PAGE_NO_CACHE);
+       iic->target_id = (nodeid << 4) + ((cpu & 1) ? 0xf : 0xe);
+       return 0;
 }
 
 #ifdef CONFIG_SMP
@@ -227,6 +231,12 @@ void iic_cause_IPI(int cpu, int mesg)
        out_be64(&per_cpu(iic, cpu).regs->generate, (IIC_NUM_IPIS - 1 - mesg) << 4);
 }
 
+u8 iic_get_target_id(int cpu)
+{
+       return per_cpu(iic, cpu).target_id;
+}
+EXPORT_SYMBOL_GPL(iic_get_target_id);
+
 static irqreturn_t iic_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
 {
        smp_message_recv(iic_irq_to_ipi(irq), regs);
@@ -276,7 +286,7 @@ void iic_init_IRQ(void)
        irq_offset = 0;
        for_each_cpu(cpu) {
                iic = &per_cpu(iic, cpu);
-               iic->regs = find_iic(cpu);
+               setup_iic(cpu, iic);
                if (iic->regs)
                        out_be64(&iic->regs->prio, 0xff);
        }
index 37d58e6fd0c6c4f17e14166ec63e29e306df1328..a14bd38791c01f1c3e1f37f5d2a083c251c3e472 100644 (file)
@@ -54,6 +54,7 @@ extern void iic_setup_cpu(void);
 extern void iic_local_enable(void);
 extern void iic_local_disable(void);
 
+extern u8 iic_get_target_id(int cpu);
 
 extern void spider_init_IRQ(void);
 extern int spider_get_irq(unsigned long int_pending);
index 7fe3fa3da0e92a92d1e3b8232e693d00e3141e7c..d75ae03df68679e9bd05d9d97c5ef00d0b18d174 100644 (file)
@@ -507,6 +507,14 @@ int spu_irq_class_1_bottom(struct spu *spu)
        return ret;
 }
 
+void spu_irq_setaffinity(struct spu *spu, int cpu)
+{
+       u64 target = iic_get_target_id(cpu);
+       u64 route = target << 48 | target << 32 | target << 16;
+       spu_int_route_set(spu, route);
+}
+EXPORT_SYMBOL_GPL(spu_irq_setaffinity);
+
 static void __iomem * __init map_spe_prop(struct device_node *n,
                                                 const char *name)
 {
index c34198c29159318900e6c8c9bb0402a9ee93362c..963182fbd1aab5cb366f93c5781a24de6ac8940c 100644 (file)
@@ -357,6 +357,11 @@ int spu_activate(struct spu_context *ctx, u64 flags)
        if (!spu)
                return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN;
        bind_context(spu, ctx);
+       /*
+        * We're likely to wait for interrupts on the same
+        * CPU that we are now on, so send them here.
+        */
+       spu_irq_setaffinity(spu, raw_smp_processor_id());
        put_active_spu(spu);
        return 0;
 }
index 692aa60e990347c60a5d7834534918f6be199eae..38bacf2f6e0c14dc8db15c335e08b1d458d25462 100644 (file)
@@ -147,6 +147,7 @@ struct spu *spu_alloc(void);
 void spu_free(struct spu *spu);
 int spu_irq_class_0_bottom(struct spu *spu);
 int spu_irq_class_1_bottom(struct spu *spu);
+void spu_irq_setaffinity(struct spu *spu, int cpu);
 
 extern struct spufs_calls {
        asmlinkage long (*create_thread)(const char __user *name,