[SPARC64]: Refine code sequences to get the cpu id.
authorDavid S. Miller <davem@davemloft.net>
Mon, 27 Feb 2006 07:27:19 +0000 (23:27 -0800)
committerDavid S. Miller <davem@sunset.davemloft.net>
Mon, 20 Mar 2006 09:11:35 +0000 (01:11 -0800)
On uniprocessor, it's always zero for optimize that.

On SMP, the jmpl to the stub kills the return address stack in the cpu
branch prediction logic, so expand the code sequence inline and use a
code patching section to fix things up.  This also always better and
explicit register selection, which will be taken advantage of in a
future changeset.

The hard_smp_processor_id() function is big, so do not inline it.

Fix up tests for Jalapeno to also test for Serrano chips too.  These
tests want "jbus Ultra-IIIi" cases to match, so that is what we should
test for.

Signed-off-by: David S. Miller <davem@davemloft.net>
arch/sparc64/kernel/entry.S
arch/sparc64/kernel/irq.c
arch/sparc64/kernel/setup.c
arch/sparc64/kernel/smp.c
arch/sparc64/kernel/traps.c
arch/sparc64/kernel/vmlinux.lds.S
include/asm-sparc64/cpudata.h
include/asm-sparc64/head.h
include/asm-sparc64/smp.h

index 563fa4ec33f8216a2f4ca5d7444c8623677c1b41..b3511ff5d04a9c8ff2360f9d7e29b27261a65169 100644 (file)
@@ -1628,84 +1628,10 @@ __flushw_user:
 2:     retl
         nop
 
-       /* Read cpu ID from hardware, return in %g6.
-        * (callers_pc - 4) is in %g1.  Patched at boot time.
-        *
-        * Default is spitfire implementation.
-        *
-        * The instruction sequence needs to be 5 instructions
-        * in order to fit the longest implementation, which is
-        * currently starfire.
-        */
-       .align          32
-       .globl          __get_cpu_id
-__get_cpu_id:
-       ldxa            [%g0] ASI_UPA_CONFIG, %g6
-       srlx            %g6, 17, %g6
-       jmpl            %g1 + 0x4, %g0
-        and            %g6, 0x1f, %g6
-       nop
-
-__get_cpu_id_cheetah_safari:
-       ldxa            [%g0] ASI_SAFARI_CONFIG, %g6
-       srlx            %g6, 17, %g6
-       jmpl            %g1 + 0x4, %g0
-        and            %g6, 0x3ff, %g6
-       nop
-
-__get_cpu_id_cheetah_jbus:
-       ldxa            [%g0] ASI_JBUS_CONFIG, %g6
-       srlx            %g6, 17, %g6
-       jmpl            %g1 + 0x4, %g0
-        and            %g6, 0x1f, %g6
-       nop
-
-__get_cpu_id_starfire:
-       sethi           %hi(0x1fff40000d0 >> 9), %g6
-       sllx            %g6, 9, %g6
-       or              %g6, 0xd0, %g6
-       jmpl            %g1 + 0x4, %g0
-        lduwa          [%g6] ASI_PHYS_BYPASS_EC_E, %g6
-
-       .globl          per_cpu_patch
-per_cpu_patch:
-       sethi           %hi(this_is_starfire), %o0
-       lduw            [%o0 + %lo(this_is_starfire)], %o1
-       sethi           %hi(__get_cpu_id_starfire), %o0
-       brnz,pn         %o1, 10f
-        or             %o0, %lo(__get_cpu_id_starfire), %o0
-       sethi           %hi(tlb_type), %o0
-       lduw            [%o0 + %lo(tlb_type)], %o1
-       brz,pt          %o1, 11f
-        nop
-       rdpr            %ver, %o0
-       srlx            %o0, 32, %o0
-       sethi           %hi(0x003e0016), %o1
-       or              %o1, %lo(0x003e0016), %o1
-       cmp             %o0, %o1
-       sethi           %hi(__get_cpu_id_cheetah_jbus), %o0
-       be,pn           %icc, 10f
-        or             %o0, %lo(__get_cpu_id_cheetah_jbus), %o0
-       sethi           %hi(__get_cpu_id_cheetah_safari), %o0
-       or              %o0, %lo(__get_cpu_id_cheetah_safari), %o0
-10:
-       sethi           %hi(__get_cpu_id), %o1
-       or              %o1, %lo(__get_cpu_id), %o1
-       lduw            [%o0 + 0x00], %o2
-       stw             %o2, [%o1 + 0x00]
-       flush           %o1 + 0x00
-       lduw            [%o0 + 0x04], %o2
-       stw             %o2, [%o1 + 0x04]
-       flush           %o1 + 0x04
-       lduw            [%o0 + 0x08], %o2
-       stw             %o2, [%o1 + 0x08]
-       flush           %o1 + 0x08
-       lduw            [%o0 + 0x0c], %o2
-       stw             %o2, [%o1 + 0x0c]
-       flush           %o1 + 0x0c
-       lduw            [%o0 + 0x10], %o2
-       stw             %o2, [%o1 + 0x10]
-       flush           %o1 + 0x10
-11:
+#ifdef CONFIG_SMP
+       .globl          hard_smp_processor_id
+hard_smp_processor_id:
+       __GET_CPUID(%o0)
        retl
         nop
+#endif
index 3e48af2769d4801065e2176f90b57508edac2d18..d069a6feb535a3b74a171c1bf67264b1fd8b4955 100644 (file)
@@ -39,6 +39,7 @@
 #include <asm/cache.h>
 #include <asm/cpudata.h>
 #include <asm/auxio.h>
+#include <asm/head.h>
 
 #ifdef CONFIG_SMP
 static void distribute_irqs(void);
@@ -153,7 +154,8 @@ void enable_irq(unsigned int irq)
                unsigned long ver;
 
                __asm__ ("rdpr %%ver, %0" : "=r" (ver));
-               if ((ver >> 32) == 0x003e0016) {
+               if ((ver >> 32) == __JALAPENO_ID ||
+                   (ver >> 32) == __SERRANO_ID) {
                        /* We set it to our JBUS ID. */
                        __asm__ __volatile__("ldxa [%%g0] %1, %0"
                                             : "=r" (tid)
index 59a70301a6cf65aeebc0195191595064e5d95bea..f751d11926bc39f57ce8552b8c11b9869a5a58f7 100644 (file)
@@ -490,6 +490,58 @@ void register_prom_callbacks(void)
                   "' linux-.soft2 to .soft2");
 }
 
+static void __init per_cpu_patch(void)
+{
+#ifdef CONFIG_SMP
+       struct cpuid_patch_entry *p;
+       unsigned long ver;
+       int is_jbus;
+
+       if (tlb_type == spitfire && !this_is_starfire)
+               return;
+
+       __asm__ ("rdpr %%ver, %0" : "=r" (ver));
+       is_jbus = ((ver >> 32) == __JALAPENO_ID ||
+                  (ver >> 32) == __SERRANO_ID);
+
+       p = &__cpuid_patch;
+       while (p < &__cpuid_patch_end) {
+               unsigned long addr = p->addr;
+               unsigned int *insns;
+
+               switch (tlb_type) {
+               case spitfire:
+                       insns = &p->starfire[0];
+                       break;
+               case cheetah:
+               case cheetah_plus:
+                       if (is_jbus)
+                               insns = &p->cheetah_jbus[0];
+                       else
+                               insns = &p->cheetah_safari[0];
+                       break;
+               default:
+                       prom_printf("Unknown cpu type, halting.\n");
+                       prom_halt();
+               };
+
+               *(unsigned int *) (addr +  0) = insns[0];
+               __asm__ __volatile__("flush     %0" : : "r" (addr +  0));
+
+               *(unsigned int *) (addr +  4) = insns[1];
+               __asm__ __volatile__("flush     %0" : : "r" (addr +  4));
+
+               *(unsigned int *) (addr +  8) = insns[2];
+               __asm__ __volatile__("flush     %0" : : "r" (addr +  8));
+
+               *(unsigned int *) (addr + 12) = insns[3];
+               __asm__ __volatile__("flush     %0" : : "r" (addr + 12));
+
+               p++;
+       }
+#endif
+}
+
 void __init setup_arch(char **cmdline_p)
 {
        /* Initialize PROM console and command line. */
@@ -507,8 +559,8 @@ void __init setup_arch(char **cmdline_p)
        /* Work out if we are starfire early on */
        check_if_starfire();
 
-       /* Now we know enough to patch the __get_cpu_id()
-        * trampoline used by trap code.
+       /* Now we know enough to patch the get_cpuid sequences
+        * used by trap code.
         */
        per_cpu_patch();
 
index 0e7552546d3658c71420067ee53a8cdcd2af869d..16b8eca9754e71dbb89e7713be2e521bd0d16e26 100644 (file)
@@ -424,7 +424,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c
 static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
 {
        u64 pstate, ver;
-       int nack_busy_id, is_jalapeno;
+       int nack_busy_id, is_jbus;
 
        if (cpus_empty(mask))
                return;
@@ -434,7 +434,8 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
         * derivative processor.
         */
        __asm__ ("rdpr %%ver, %0" : "=r" (ver));
-       is_jalapeno = ((ver >> 32) == 0x003e0016);
+       is_jbus = ((ver >> 32) == __JALAPENO_ID ||
+                  (ver >> 32) == __SERRANO_ID);
 
        __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
 
@@ -459,7 +460,7 @@ retry:
                for_each_cpu_mask(i, mask) {
                        u64 target = (i << 14) | 0x70;
 
-                       if (!is_jalapeno)
+                       if (!is_jbus)
                                target |= (nack_busy_id << 24);
                        __asm__ __volatile__(
                                "stxa   %%g0, [%0] %1\n\t"
@@ -512,7 +513,7 @@ retry:
                        for_each_cpu_mask(i, mask) {
                                u64 check_mask;
 
-                               if (is_jalapeno)
+                               if (is_jbus)
                                        check_mask = (0x2UL << (2*i));
                                else
                                        check_mask = (0x2UL <<
index 7e52e89726684c2a46c9d2ef1bbbfd00a116ddd3..1c4744c047abcc3697444b7b5c7aca24ee8c09e2 100644 (file)
@@ -38,6 +38,7 @@
 #include <asm/processor.h>
 #include <asm/timer.h>
 #include <asm/kdebug.h>
+#include <asm/head.h>
 #ifdef CONFIG_KMOD
 #include <linux/kmod.h>
 #endif
@@ -788,7 +789,8 @@ void __init cheetah_ecache_flush_init(void)
                cheetah_error_log[i].afsr = CHAFSR_INVALID;
 
        __asm__ ("rdpr %%ver, %0" : "=r" (ver));
-       if ((ver >> 32) == 0x003e0016) {
+       if ((ver >> 32) == __JALAPENO_ID ||
+           (ver >> 32) == __SERRANO_ID) {
                cheetah_error_table = &__jalapeno_error_table[0];
                cheetah_afsr_errors = JPAFSR_ERRORS;
        } else if ((ver >> 32) == 0x003e0015) {
index 71b943f1c9b1c22a26901d22439dd421a7abda0b..1639d9c935c3cc20a57ea026699a372a818f694c 100644 (file)
@@ -74,6 +74,9 @@ SECTIONS
   __tsb_phys_patch = .;
   .tsb_phys_patch : { *(.tsb_phys_patch) }
   __tsb_phys_patch_end = .;
+  __cpuid_patch = .;
+  .cpuid_patch : { *(.cpuid_patch) }
+  __cpuid_patch_end = .;
   . = ALIGN(8192); 
   __initramfs_start = .;
   .init.ramfs : { *(.init.ramfs) }
index f83768883e983ec67ac6d6394fca4d90560d789f..da54b4f35403df0c54f60d542f6e990207a8bc75 100644 (file)
@@ -60,9 +60,18 @@ struct trap_per_cpu {
 } __attribute__((aligned(64)));
 extern struct trap_per_cpu trap_block[NR_CPUS];
 extern void init_cur_cpu_trap(void);
-extern void per_cpu_patch(void);
 extern void setup_tba(void);
 
+#ifdef CONFIG_SMP
+struct cpuid_patch_entry {
+       unsigned int    addr;
+       unsigned int    cheetah_safari[4];
+       unsigned int    cheetah_jbus[4];
+       unsigned int    starfire[4];
+};
+extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end;
+#endif
+
 #endif /* !(__ASSEMBLY__) */
 
 #define TRAP_PER_CPU_THREAD    0x00
@@ -70,35 +79,58 @@ extern void setup_tba(void);
 
 #define TRAP_BLOCK_SZ_SHIFT    6
 
-/* Clobbers %g1, loads %g6 with local processor's cpuid */
-#define __GET_CPUID                    \
-       ba,pt   %xcc, __get_cpu_id;     \
-        rd     %pc, %g1;
+#ifdef CONFIG_SMP
+
+#define __GET_CPUID(REG)                               \
+       /* Spitfire implementation (default). */        \
+661:   ldxa            [%g0] ASI_UPA_CONFIG, REG;      \
+       srlx            REG, 17, REG;                   \
+        and            REG, 0x1f, REG;                 \
+       nop;                                            \
+       .section        .cpuid_patch, "ax";             \
+       /* Instruction location. */                     \
+       .word           661b;                           \
+       /* Cheetah Safari implementation. */            \
+       ldxa            [%g0] ASI_SAFARI_CONFIG, REG;   \
+       srlx            REG, 17, REG;                   \
+       and             REG, 0x3ff, REG;                \
+       nop;                                            \
+       /* Cheetah JBUS implementation. */              \
+       ldxa            [%g0] ASI_JBUS_CONFIG, REG;     \
+       srlx            REG, 17, REG;                   \
+       and             REG, 0x1f, REG;                 \
+       nop;                                            \
+       /* Starfire implementation. */                  \
+       sethi           %hi(0x1fff40000d0 >> 9), REG;   \
+       sllx            REG, 9, REG;                    \
+       or              REG, 0xd0, REG;                 \
+       lduwa           [REG] ASI_PHYS_BYPASS_EC_E, REG;\
+       .previous;
 
 /* Clobbers %g1, current address space PGD phys address into %g7.  */
 #define TRAP_LOAD_PGD_PHYS                     \
-       __GET_CPUID                             \
-       sllx    %g6, TRAP_BLOCK_SZ_SHIFT, %g6;  \
+       __GET_CPUID(%g1)                        \
        sethi   %hi(trap_block), %g7;           \
+       sllx    %g1, TRAP_BLOCK_SZ_SHIFT, %g1;  \
        or      %g7, %lo(trap_block), %g7;      \
-       add     %g7, %g6, %g7;                  \
+       add     %g7, %g1, %g7;                  \
        ldx     [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7;
 
 /* Clobbers %g1, loads local processor's IRQ work area into %g6.  */
 #define TRAP_LOAD_IRQ_WORK                     \
-       __GET_CPUID                             \
-       sethi   %hi(__irq_work), %g1;           \
-       sllx    %g6, 6, %g6;                    \
-       or      %g1, %lo(__irq_work), %g1;      \
-       add     %g1, %g6, %g6;
+       __GET_CPUID(%g1)                        \
+       sethi   %hi(__irq_work), %g6;           \
+       sllx    %g1, 6, %g1;                    \
+       or      %g6, %lo(__irq_work), %g6;      \
+       add     %g6, %g1, %g6;
 
 /* Clobbers %g1, loads %g6 with current thread info pointer.  */
 #define TRAP_LOAD_THREAD_REG                   \
-       __GET_CPUID                             \
-       sllx    %g6, TRAP_BLOCK_SZ_SHIFT, %g6;  \
-       sethi   %hi(trap_block), %g1;           \
-       or      %g1, %lo(trap_block), %g1;      \
-       ldx     [%g1 + %g6], %g6;
+       __GET_CPUID(%g1)                        \
+       sethi   %hi(trap_block), %g6;           \
+       sllx    %g1, TRAP_BLOCK_SZ_SHIFT, %g1;  \
+       or      %g6, %lo(trap_block), %g6;      \
+       ldx     [%g6 + %g1], %g6;
 
 /* Given the current thread info pointer in %g6, load the per-cpu
  * area base of the current processor into %g5.  REG1, REG2, and REG3 are
@@ -109,7 +141,6 @@ extern void setup_tba(void);
  * trap will load the fully resolved %g5 per-cpu base.  This can corrupt
  * the calculations done by the macro mid-stream.
  */
-#ifdef CONFIG_SMP
 #define LOAD_PER_CPU_BASE(REG1, REG2, REG3)            \
        ldub    [%g6 + TI_CPU], REG1;                   \
        sethi   %hi(__per_cpu_shift), REG3;             \
@@ -118,8 +149,26 @@ extern void setup_tba(void);
        ldx     [REG2 + %lo(__per_cpu_base)], REG2;     \
        sllx    REG1, REG3, REG3;                       \
        add     REG3, REG2, %g5;
+
 #else
+
+/* Uniprocessor versions, we know the cpuid is zero.  */
+#define TRAP_LOAD_PGD_PHYS                     \
+       sethi   %hi(trap_block), %g7;           \
+       or      %g7, %lo(trap_block), %g7;      \
+       ldx     [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7;
+
+#define TRAP_LOAD_IRQ_WORK                     \
+       sethi   %hi(__irq_work), %g6;           \
+       or      %g6, %lo(__irq_work), %g6;
+
+#define TRAP_LOAD_THREAD_REG                   \
+       sethi   %hi(trap_block), %g6;           \
+       ldx     [%g6 + %lo(trap_block)], %g6;
+
+/* No per-cpu areas on uniprocessor, so no need to load %g5.  */
 #define LOAD_PER_CPU_BASE(REG1, REG2, REG3)
-#endif
+
+#endif /* !(CONFIG_SMP) */
 
 #endif /* _SPARC64_CPUDATA_H */
index 0abd3a674e8f1abcd178f5f3fe9260efcc2a04ce..731c842f3d11effd08993dbc0e119c31de33acd1 100644 (file)
@@ -10,6 +10,7 @@
 
 #define __CHEETAH_ID   0x003e0014
 #define __JALAPENO_ID  0x003e0016
+#define __SERRANO_ID   0x003e0022
 
 #define CHEETAH_MANUF          0x003e
 #define CHEETAH_IMPL           0x0014 /* Ultra-III   */
index 473edb2603ecd46d0c14186fdacb4b544a5baba2..ad1d35a7d13f0e344dbf5ddfb3be465e0c29fa2d 100644 (file)
@@ -37,33 +37,7 @@ extern cpumask_t phys_cpu_present_map;
  *     General functions that each host system must provide.
  */
 
-static __inline__ int hard_smp_processor_id(void)
-{
-       if (tlb_type == cheetah || tlb_type == cheetah_plus) {
-               unsigned long cfg, ver;
-               __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
-               if ((ver >> 32) == 0x003e0016) {
-                       __asm__ __volatile__("ldxa [%%g0] %1, %0"
-                                            : "=r" (cfg)
-                                            : "i" (ASI_JBUS_CONFIG));
-                       return ((cfg >> 17) & 0x1f);
-               } else {
-                       __asm__ __volatile__("ldxa [%%g0] %1, %0"
-                                            : "=r" (cfg)
-                                            : "i" (ASI_SAFARI_CONFIG));
-                       return ((cfg >> 17) & 0x3ff);
-               }
-       } else if (this_is_starfire != 0) {
-               return starfire_hard_smp_processor_id();
-       } else {
-               unsigned long upaconfig;
-               __asm__ __volatile__("ldxa      [%%g0] %1, %0"
-                                    : "=r" (upaconfig)
-                                    : "i" (ASI_UPA_CONFIG));
-               return ((upaconfig >> 17) & 0x1f);
-       }
-}
-
+extern int hard_smp_processor_id(void);
 #define raw_smp_processor_id() (current_thread_info()->cpu)
 
 extern void smp_setup_cpu_possible_map(void);