[POWERPC] Cleanup SMT thread handling
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>
Thu, 25 Oct 2007 05:27:44 +0000 (15:27 +1000)
committerPaul Mackerras <paulus@samba.org>
Mon, 3 Dec 2007 02:56:25 +0000 (13:56 +1100)
This cleans up the SMT thread handling, removing some hard coded
assumptions and providing a set of helpers to convert between linux
cpu numbers, thread numbers and cores.

This implementation requires the number of threads per core to be a
power of 2 and identical on all cores in the system, but it's an
implementation detail, not an API requirement and so this limitation
can be lifted in the future if anybody ever needs it.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
arch/powerpc/kernel/setup-common.c
arch/powerpc/platforms/cell/smp.c
arch/powerpc/platforms/pseries/smp.c
include/asm-powerpc/cputhreads.h [new file with mode: 0644]

index 2de00f870edc34438352ae71ed54c7013228b7fa..6adb5a1e98bb8f2a46eac766b03288ac8e4b1f7e 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/serial.h>
 #include <linux/serial_8250.h>
 #include <linux/debugfs.h>
+#include <linux/percpu.h>
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/processor.h>
@@ -57,6 +58,7 @@
 #include <asm/mmu.h>
 #include <asm/lmb.h>
 #include <asm/xmon.h>
+#include <asm/cputhreads.h>
 
 #include "setup.h"
 
@@ -327,6 +329,31 @@ void __init check_for_initrd(void)
 
 #ifdef CONFIG_SMP
 
+int threads_per_core, threads_shift;
+cpumask_t threads_core_mask;
+
+static void __init cpu_init_thread_core_maps(int tpc)
+{
+       int i;
+
+       threads_per_core = tpc;
+       threads_core_mask = CPU_MASK_NONE;
+
+       /* This implementation only supports power of 2 number of threads
+        * for simplicity and performance
+        */
+       threads_shift = ilog2(tpc);
+       BUG_ON(tpc != (1 << threads_shift));
+
+       for (i = 0; i < tpc; i++)
+               cpu_set(i, threads_core_mask);
+
+       printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n",
+              tpc, tpc > 1 ? "s" : "");
+       printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift);
+}
+
+
 /**
  * setup_cpu_maps - initialize the following cpu maps:
  *                  cpu_possible_map
@@ -350,22 +377,32 @@ void __init smp_setup_cpu_maps(void)
 {
        struct device_node *dn = NULL;
        int cpu = 0;
+       int nthreads = 1;
+
+       DBG("smp_setup_cpu_maps()\n");
 
        while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) {
                const int *intserv;
-               int j, len = sizeof(u32), nthreads = 1;
+               int j, len;
+
+               DBG("  * %s...\n", dn->full_name);
 
                intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
                                &len);
-               if (intserv)
+               if (intserv) {
                        nthreads = len / sizeof(int);
-               else {
+                       DBG("    ibm,ppc-interrupt-server#s -> %d threads\n",
+                           nthreads);
+               } else {
+                       DBG("    no ibm,ppc-interrupt-server#s -> 1 thread\n");
                        intserv = of_get_property(dn, "reg", NULL);
                        if (!intserv)
                                intserv = &cpu; /* assume logical == phys */
                }
 
                for (j = 0; j < nthreads && cpu < NR_CPUS; j++) {
+                       DBG("    thread %d -> cpu %d (hard id %d)\n",
+                           j, cpu, intserv[j]);
                        cpu_set(cpu, cpu_present_map);
                        set_hard_smp_processor_id(cpu, intserv[j]);
                        cpu_set(cpu, cpu_possible_map);
@@ -373,6 +410,12 @@ void __init smp_setup_cpu_maps(void)
                }
        }
 
+       /* If no SMT supported, nthreads is forced to 1 */
+       if (!cpu_has_feature(CPU_FTR_SMT)) {
+               DBG("  SMT disabled ! nthreads forced to 1\n");
+               nthreads = 1;
+       }
+
 #ifdef CONFIG_PPC64
        /*
         * On pSeries LPAR, we need to know how many cpus
@@ -395,7 +438,7 @@ void __init smp_setup_cpu_maps(void)
 
                /* Double maxcpus for processors which have SMT capability */
                if (cpu_has_feature(CPU_FTR_SMT))
-                       maxcpus *= 2;
+                       maxcpus *= nthreads;
 
                if (maxcpus > NR_CPUS) {
                        printk(KERN_WARNING
@@ -412,9 +455,16 @@ void __init smp_setup_cpu_maps(void)
        out:
                of_node_put(dn);
        }
-
        vdso_data->processorCount = num_present_cpus();
 #endif /* CONFIG_PPC64 */
+
+        /* Initialize CPU <=> thread mapping/
+        *
+        * WARNING: We assume that the number of threads is the same for
+        * every CPU in the system. If that is not the case, then some code
+        * here will have to be reworked
+        */
+       cpu_init_thread_core_maps(nthreads);
 }
 
 /*
@@ -424,17 +474,19 @@ void __init smp_setup_cpu_maps(void)
  */
 void __init smp_setup_cpu_sibling_map(void)
 {
-#if defined(CONFIG_PPC64)
-       int cpu;
+#ifdef CONFIG_PPC64
+       int i, cpu, base;
 
-       /*
-        * Do the sibling map; assume only two threads per processor.
-        */
        for_each_possible_cpu(cpu) {
-               cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
-               if (cpu_has_feature(CPU_FTR_SMT))
-                       cpu_set(cpu ^ 0x1, per_cpu(cpu_sibling_map, cpu));
+               DBG("Sibling map for CPU %d:", cpu);
+               base = cpu_first_thread_in_core(cpu);
+               for (i = 0; i < threads_per_core; i++) {
+                       cpu_set(base + i, per_cpu(cpu_sibling_map, cpu));
+                       DBG(" %d", base + i);
+               }
+               DBG("\n");
        }
+
 #endif /* CONFIG_PPC64 */
 }
 #endif /* CONFIG_SMP */
index e4438456c8675c8917052ba9e95cdd8a9e828809..efb3964457b1db4451c1c75d97dbf03e2c40ca0b 100644 (file)
@@ -42,6 +42,7 @@
 #include <asm/firmware.h>
 #include <asm/system.h>
 #include <asm/rtas.h>
+#include <asm/cputhreads.h>
 
 #include "interrupt.h"
 #include <asm/udbg.h>
@@ -182,7 +183,7 @@ static int smp_cell_cpu_bootable(unsigned int nr)
         */
        if (system_state < SYSTEM_RUNNING &&
            cpu_has_feature(CPU_FTR_SMT) &&
-           !smt_enabled_at_boot && nr % 2 != 0)
+           !smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
                return 0;
 
        return 1;
index 116305b22a2bf209721e12bfca19666e6a293432..ea4c65917a648c5e4795a99a23ba87c4efaf3351 100644 (file)
@@ -46,6 +46,7 @@
 #include <asm/pSeries_reconfig.h>
 #include <asm/mpic.h>
 #include <asm/vdso_datapage.h>
+#include <asm/cputhreads.h>
 
 #include "plpar_wrappers.h"
 #include "pseries.h"
@@ -202,7 +203,7 @@ static int smp_pSeries_cpu_bootable(unsigned int nr)
         */
        if (system_state < SYSTEM_RUNNING &&
            cpu_has_feature(CPU_FTR_SMT) &&
-           !smt_enabled_at_boot && nr % 2 != 0)
+           !smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
                return 0;
 
        return 1;
diff --git a/include/asm-powerpc/cputhreads.h b/include/asm-powerpc/cputhreads.h
new file mode 100644 (file)
index 0000000..8485c28
--- /dev/null
@@ -0,0 +1,71 @@
+#ifndef _ASM_POWERPC_CPUTHREADS_H
+#define _ASM_POWERPC_CPUTHREADS_H
+
+#include <linux/cpumask.h>
+
+/*
+ * Mapping of threads to cores
+ */
+
+#ifdef CONFIG_SMP
+extern int threads_per_core;
+extern int threads_shift;
+extern cpumask_t threads_core_mask;
+#else
+#define threads_per_core       1
+#define threads_shift          0
+#define threads_core_mask      (CPU_MASK_CPU0)
+#endif
+
+/* cpu_thread_mask_to_cores - Return a cpumask of one per cores
+ *                            hit by the argument
+ *
+ * @threads:   a cpumask of threads
+ *
+ * This function returns a cpumask which will have one "cpu" (or thread)
+ * bit set for each core that has at least one thread set in the argument.
+ *
+ * This can typically be used for things like IPI for tlb invalidations
+ * since those need to be done only once per core/TLB
+ */
+static inline cpumask_t cpu_thread_mask_to_cores(cpumask_t threads)
+{
+       cpumask_t       tmp, res;
+       int             i;
+
+       res = CPU_MASK_NONE;
+       for (i = 0; i < NR_CPUS; i += threads_per_core) {
+               cpus_shift_right(tmp, threads_core_mask, i);
+               if (cpus_intersects(threads, tmp))
+                       cpu_set(i, res);
+       }
+       return res;
+}
+
+static inline int cpu_nr_cores(void)
+{
+       return NR_CPUS >> threads_shift;
+}
+
+static inline cpumask_t cpu_online_cores_map(void)
+{
+       return cpu_thread_mask_to_cores(cpu_online_map);
+}
+
+static inline int cpu_thread_to_core(int cpu)
+{
+       return cpu >> threads_shift;
+}
+
+static inline int cpu_thread_in_core(int cpu)
+{
+       return cpu & (threads_per_core - 1);
+}
+
+static inline int cpu_first_thread_in_core(int cpu)
+{
+       return cpu & ~(threads_per_core - 1);
+}
+
+#endif /* _ASM_POWERPC_CPUTHREADS_H */
+