powerpc/64s/hash: Use POWER9 SLBIA IH=3 variant in switch_slb
authorNicholas Piggin <npiggin@gmail.com>
Fri, 14 Sep 2018 15:30:50 +0000 (01:30 +1000)
committerMichael Ellerman <mpe@ellerman.id.au>
Wed, 19 Sep 2018 11:59:44 +0000 (21:59 +1000)
POWER9 introduces SLBIA IH=3, which invalidates all SLB entries and
associated lookaside information that have a class value of 1, which
Linux assigns to user addresses. This matches what switch_slb wants,
and allows a simple fast implementation that avoids the slb_cache
complexity.

As a side-effect, the POWER5 < DD2.1 SLB invalidation workaround is
also avoided on POWER9.

Process context switching rate is improved about 2.2% for a small
process that hits the slb cache which is the best case for the current
code.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
arch/powerpc/mm/slb.c
arch/powerpc/xmon/xmon.c

index a8f27fee6a231138399b4fe2678669e72dea9a01..513c6596140ddbff4778fb5ad16deb02d541db80 100644 (file)
@@ -279,7 +279,6 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2)
 /* Flush all user entries from the segment table of the current processor. */
 void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 {
-       unsigned long offset;
        unsigned long pc = KSTK_EIP(tsk);
        unsigned long stack = KSTK_ESP(tsk);
        unsigned long exec_base;
@@ -291,45 +290,56 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
         * which would update the slb_cache/slb_cache_ptr fields in the PACA.
         */
        hard_irq_disable();
-       offset = get_paca()->slb_cache_ptr;
-       if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
-           offset <= SLB_CACHE_ENTRIES) {
-               unsigned long slbie_data = 0;
-               int i;
-
-               asm volatile("isync" : : : "memory");
-               for (i = 0; i < offset; i++) {
-                       slbie_data = (unsigned long)get_paca()->slb_cache[i]
-                               << SID_SHIFT; /* EA */
-                       slbie_data |= user_segment_size(slbie_data)
-                               << SLBIE_SSIZE_SHIFT;
-                       slbie_data |= SLBIE_C; /* C set for user addresses */
-                       asm volatile("slbie %0" : : "r" (slbie_data));
+       if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+               /*
+                * SLBIA IH=3 invalidates all Class=1 SLBEs and their
+                * associated lookaside structures, which matches what
+                * switch_slb wants. So ARCH_300 does not use the slb
+                * cache.
+                */
+               asm volatile("isync ; " PPC_SLBIA(3)" ; isync");
+       } else {
+               unsigned long offset = get_paca()->slb_cache_ptr;
+
+               if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
+                   offset <= SLB_CACHE_ENTRIES) {
+                       unsigned long slbie_data = 0;
+                       int i;
+
+                       asm volatile("isync" : : : "memory");
+                       for (i = 0; i < offset; i++) {
+                               /* EA */
+                               slbie_data = (unsigned long)
+                                       get_paca()->slb_cache[i] << SID_SHIFT;
+                               slbie_data |= user_segment_size(slbie_data)
+                                               << SLBIE_SSIZE_SHIFT;
+                               slbie_data |= SLBIE_C; /* user slbs have C=1 */
+                               asm volatile("slbie %0" : : "r" (slbie_data));
+                       }
+
+                       /* Workaround POWER5 < DD2.1 issue */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
+                               asm volatile("slbie %0" : : "r" (slbie_data));
+
+                       asm volatile("isync" : : : "memory");
+               } else {
+                       struct slb_shadow *p = get_slb_shadow();
+                       unsigned long ksp_esid_data =
+                               be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
+                       unsigned long ksp_vsid_data =
+                               be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
+
+                       asm volatile("isync\n"
+                                    PPC_SLBIA(1) "\n"
+                                    "slbmte    %0,%1\n"
+                                    "isync"
+                                    :: "r"(ksp_vsid_data),
+                                       "r"(ksp_esid_data));
                }
 
-               /* Workaround POWER5 < DD2.1 issue */
-               if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
-                       asm volatile("slbie %0" : : "r" (slbie_data));
-
-               asm volatile("isync" : : : "memory");
-       } else {
-               struct slb_shadow *p = get_slb_shadow();
-               unsigned long ksp_esid_data =
-                       be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
-               unsigned long ksp_vsid_data =
-                       be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
-
-               asm volatile("isync\n"
-                            PPC_SLBIA(1) "\n"
-                            "slbmte    %0,%1\n"
-                            "isync"
-                            :: "r"(ksp_vsid_data),
-                               "r"(ksp_esid_data));
-
-               asm volatile("isync" : : : "memory");
+               get_paca()->slb_cache_ptr = 0;
        }
 
-       get_paca()->slb_cache_ptr = 0;
        copy_mm_to_paca(mm);
 
        /*
@@ -455,6 +465,9 @@ static void insert_slb_entry(unsigned long vsid, unsigned long ea,
        enum slb_index index;
        int slb_cache_index;
 
+       if (cpu_has_feature(CPU_FTR_ARCH_300))
+               return; /* ISAv3.0B and later does not use slb_cache */
+
        /*
         * We are irq disabled, hence should be safe to access PACA.
         */
index 4264aedc7775a70b4df3af92378fb7c23b3712e8..cd43c168dc1bba8ad933f6332c9d3a9009edb999 100644 (file)
@@ -2393,10 +2393,13 @@ static void dump_one_paca(int cpu)
                }
        }
        DUMP(p, vmalloc_sllp, "%#-*x");
-       DUMP(p, slb_cache_ptr, "%#-*x");
-       for (i = 0; i < SLB_CACHE_ENTRIES; i++)
-               printf(" %-*s[%d] = 0x%016x\n",
-                      22, "slb_cache", i, p->slb_cache[i]);
+
+       if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
+               DUMP(p, slb_cache_ptr, "%#-*x");
+               for (i = 0; i < SLB_CACHE_ENTRIES; i++)
+                       printf(" %-*s[%d] = 0x%016x\n",
+                              22, "slb_cache", i, p->slb_cache[i]);
+       }
 
        DUMP(p, rfi_flush_fallback_area, "%-*px");
 #endif