Revert "convert SLB miss handlers to C" and subsequent commits
authorMichael Ellerman <mpe@ellerman.id.au>
Tue, 2 Oct 2018 13:56:39 +0000 (23:56 +1000)
committerMichael Ellerman <mpe@ellerman.id.au>
Wed, 3 Oct 2018 05:32:49 +0000 (15:32 +1000)
This reverts commits:
  5e46e29e6a97 ("powerpc/64s/hash: convert SLB miss handlers to C")
  8fed04d0f6ae ("powerpc/64s/hash: remove user SLB data from the paca")
  655deecf67b2 ("powerpc/64s/hash: SLB allocation status bitmaps")
  2e1626744e8d ("powerpc/64s/hash: provide arch_setup_exec hooks for hash slice setup")
  89ca4e126a3f ("powerpc/64s/hash: Add a SLB preload cache")

This series had a few bugs, and the fixes are not all trivial. So
revert most of it for now.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
19 files changed:
arch/powerpc/include/asm/asm-prototypes.h
arch/powerpc/include/asm/book3s/64/mmu-hash.h
arch/powerpc/include/asm/exception-64s.h
arch/powerpc/include/asm/paca.h
arch/powerpc/include/asm/processor.h
arch/powerpc/include/asm/slice.h
arch/powerpc/include/asm/thread_info.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/paca.c
arch/powerpc/kernel/process.c
arch/powerpc/mm/Makefile
arch/powerpc/mm/hash_utils_64.c
arch/powerpc/mm/mmu_context.c
arch/powerpc/mm/mmu_context_book3s64.c
arch/powerpc/mm/slb.c
arch/powerpc/mm/slb_low.S [new file with mode: 0644]
arch/powerpc/mm/slice.c
arch/powerpc/xmon/xmon.c

index 78ed3c3f879a23147d59e117b92127b001e2a500..1f4691ce412618d42d51e5fa5206d23c9c8242cf 100644 (file)
@@ -78,8 +78,6 @@ void kernel_bad_stack(struct pt_regs *regs);
 void system_reset_exception(struct pt_regs *regs);
 void machine_check_exception(struct pt_regs *regs);
 void emulation_assist_interrupt(struct pt_regs *regs);
-long do_slb_fault(struct pt_regs *regs, unsigned long ea);
-void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err);
 
 /* signals, syscalls and interrupts */
 long sys_swapcontext(struct ucontext __user *old_ctx,
index bbeaf6adf93c9c2c6c58a68852960ec7ab9b9514..e0e4ce8f77d6d8940b9cbe96e783478e72aa9ce9 100644 (file)
@@ -487,8 +487,6 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
 extern void pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages);
 extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr);
 
-extern void hash__setup_new_exec(void);
-
 #ifdef CONFIG_PPC_PSERIES
 void hpte_init_pseries(void);
 #else
@@ -503,7 +501,6 @@ struct slb_entry {
 };
 
 extern void slb_initialize(void);
-extern void core_flush_all_slbs(struct mm_struct *mm);
 extern void slb_flush_and_rebolt(void);
 void slb_flush_all_realmode(void);
 void __slb_restore_bolted_realmode(void);
index 47578b79f0fbad2daf4da8507f71250e158576b5..a86feddddad0cdceb012e2249c229fa7f6e31ff2 100644 (file)
  */
 #define MAX_MCE_DEPTH  4
 
+/*
+ * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR
+ * EX_CCR similarly with DSISR, but being 4 byte registers there is a hole
+ * in the save area so it's not necessary to overlap them. Could be used
+ * for future savings though if another 4 byte register was to be saved.
+ */
+#define EX_LR          EX_DAR
+
 /*
  * EX_R3 is only used by the bad_stack handler. bad_stack reloads and
  * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap
index 6d6b3706232c74d222c63f6f95c52bfef646835f..7b6e23af38082fa7892befdc2f39371b1cbfc36d 100644 (file)
@@ -113,10 +113,7 @@ struct paca_struct {
                                 * on the linear mapping */
        /* SLB related definitions */
        u16 vmalloc_sllp;
-       u8 slb_cache_ptr;
-       u8 stab_rr;                     /* stab/slb round-robin counter */
-       u32 slb_used_bitmap;            /* Bitmaps for first 32 SLB entries. */
-       u32 slb_kern_bitmap;
+       u16 slb_cache_ptr;
        u32 slb_cache[SLB_CACHE_ENTRIES];
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
@@ -146,11 +143,24 @@ struct paca_struct {
        struct tlb_core_data tcd;
 #endif /* CONFIG_PPC_BOOK3E */
 
+#ifdef CONFIG_PPC_BOOK3S
+       mm_context_id_t mm_ctx_id;
+#ifdef CONFIG_PPC_MM_SLICES
+       unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
+       unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
+       unsigned long mm_ctx_slb_addr_limit;
+#else
+       u16 mm_ctx_user_psize;
+       u16 mm_ctx_sllp;
+#endif
+#endif
+
        /*
         * then miscellaneous read-write fields
         */
        struct task_struct *__current;  /* Pointer to current */
        u64 kstack;                     /* Saved Kernel stack addr */
+       u64 stab_rr;                    /* stab/slb round-robin counter */
        u64 saved_r1;                   /* r1 save for RTAS calls or PM or EE=0 */
        u64 saved_msr;                  /* MSR saved here by enter_rtas */
        u16 trap_save;                  /* Used when bad stack is encountered */
@@ -248,6 +258,7 @@ struct paca_struct {
 #endif /* CONFIG_PPC_BOOK3S_64 */
 } ____cacheline_aligned;
 
+extern void copy_mm_to_paca(struct mm_struct *mm);
 extern struct paca_struct **paca_ptrs;
 extern void initialise_paca(struct paca_struct *new_paca, int cpu);
 extern void setup_paca(struct paca_struct *new_paca);
index 350c584ca1799461bc25b681cf063f49d3f52a37..52fadded5c1ef653f2448764bf0a3a79295cdeaa 100644 (file)
@@ -273,7 +273,6 @@ struct thread_struct {
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
        struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
        unsigned long   trap_nr;        /* last trap # on this thread */
-       u8 load_slb;                    /* Ages out SLB preload cache entries */
        u8 load_fp;
 #ifdef CONFIG_ALTIVEC
        u8 load_vec;
index a595461c9cb03b70f5f54b511ce13e7063a867f4..e40406cf5628eafdedd045f81486a4855ee82751 100644 (file)
@@ -32,7 +32,6 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
                           unsigned long len, unsigned int psize);
 
 void slice_init_new_context_exec(struct mm_struct *mm);
-void slice_setup_new_exec(void);
 
 #endif /* __ASSEMBLY__ */
 
index 406eb952b808015b8246633064244431ec864be7..3c0002044bc9e6237e0c5c7a18ca922e315d7231 100644 (file)
@@ -29,7 +29,6 @@
 #include <asm/page.h>
 #include <asm/accounting.h>
 
-#define SLB_PRELOAD_NR 16U
 /*
  * low level task data.
  */
@@ -45,10 +44,6 @@ struct thread_info {
 #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC32)
        struct cpu_accounting_data accounting;
 #endif
-       u8 slb_preload_nr;
-       u8 slb_preload_tail;
-       u32 slb_preload_esid[SLB_PRELOAD_NR];
-
        /* low level flags - has atomic operations done on it */
        unsigned long   flags ____cacheline_aligned_in_smp;
 };
@@ -77,12 +72,6 @@ static inline struct thread_info *current_thread_info(void)
 }
 
 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
-
-#ifdef CONFIG_PPC_BOOK3S_64
-void arch_setup_new_exec(void);
-#define arch_setup_new_exec arch_setup_new_exec
-#endif
-
 #endif /* __ASSEMBLY__ */
 
 /*
index ba9d0fc987305a499fad8608deea3d266ec8c8e7..89cf15566c4e80ba4e81d500e1d86f204e1241b2 100644 (file)
@@ -173,6 +173,7 @@ int main(void)
        OFFSET(PACAKSAVE, paca_struct, kstack);
        OFFSET(PACACURRENT, paca_struct, __current);
        OFFSET(PACASAVEDMSR, paca_struct, saved_msr);
+       OFFSET(PACASTABRR, paca_struct, stab_rr);
        OFFSET(PACAR1, paca_struct, saved_r1);
        OFFSET(PACATOC, paca_struct, kernel_toc);
        OFFSET(PACAKBASE, paca_struct, kernelbase);
@@ -180,6 +181,15 @@ int main(void)
        OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask);
        OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened);
        OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled);
+#ifdef CONFIG_PPC_BOOK3S
+       OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id);
+#ifdef CONFIG_PPC_MM_SLICES
+       OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize);
+       OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize);
+       OFFSET(PACA_SLB_ADDR_LIMIT, paca_struct, mm_ctx_slb_addr_limit);
+       DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
+#endif /* CONFIG_PPC_MM_SLICES */
+#endif
 
 #ifdef CONFIG_PPC_BOOK3E
        OFFSET(PACAPGD, paca_struct, pgd);
@@ -202,7 +212,6 @@ int main(void)
 #ifdef CONFIG_PPC_BOOK3S_64
        OFFSET(PACASLBCACHE, paca_struct, slb_cache);
        OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
-       OFFSET(PACASTABRR, paca_struct, stab_rr);
        OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
 #ifdef CONFIG_PPC_MM_SLICES
        OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp);
index 786f4fa5100ae7c09296e13b10cfda3f257c9f3e..301a6a86a20fc5c13d36e37114101511b2384d7e 100644 (file)
@@ -596,36 +596,28 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 
 
 EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
-EXCEPTION_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, KVMTEST_PR, 0x380);
+       SET_SCRATCH0(r13)
+       EXCEPTION_PROLOG_0(PACA_EXSLB)
+       EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
+       mr      r12,r3  /* save r3 */
+       mfspr   r3,SPRN_DAR
+       mfspr   r11,SPRN_SRR1
+       crset   4*cr6+eq
+       BRANCH_TO_COMMON(r10, slb_miss_common)
 EXC_REAL_END(data_access_slb, 0x380, 0x80)
 
 EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
-EXCEPTION_RELON_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, NOTEST, 0x380);
+       SET_SCRATCH0(r13)
+       EXCEPTION_PROLOG_0(PACA_EXSLB)
+       EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
+       mr      r12,r3  /* save r3 */
+       mfspr   r3,SPRN_DAR
+       mfspr   r11,SPRN_SRR1
+       crset   4*cr6+eq
+       BRANCH_TO_COMMON(r10, slb_miss_common)
 EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
-
 TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
 
-EXC_COMMON_BEGIN(data_access_slb_common)
-       mfspr   r10,SPRN_DAR
-       std     r10,PACA_EXSLB+EX_DAR(r13)
-       EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
-       ld      r4,PACA_EXSLB+EX_DAR(r13)
-       std     r4,_DAR(r1)
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      do_slb_fault
-       cmpdi   r3,0
-       bne-    1f
-       b       fast_exception_return
-1:     /* Error case */
-       std     r3,RESULT(r1)
-       bl      save_nvgprs
-       RECONCILE_IRQ_STATE(r10, r11)
-       ld      r4,_DAR(r1)
-       ld      r5,RESULT(r1)
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      do_bad_slb_fault
-       b       ret_from_except
-
 
 EXC_REAL(instruction_access, 0x400, 0x80)
 EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400)
@@ -648,34 +640,160 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 
 
 EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
-EXCEPTION_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, KVMTEST_PR, 0x480);
+       SET_SCRATCH0(r13)
+       EXCEPTION_PROLOG_0(PACA_EXSLB)
+       EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
+       mr      r12,r3  /* save r3 */
+       mfspr   r3,SPRN_SRR0            /* SRR0 is faulting address */
+       mfspr   r11,SPRN_SRR1
+       crclr   4*cr6+eq
+       BRANCH_TO_COMMON(r10, slb_miss_common)
 EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
 
 EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
-EXCEPTION_RELON_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, NOTEST, 0x480);
+       SET_SCRATCH0(r13)
+       EXCEPTION_PROLOG_0(PACA_EXSLB)
+       EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
+       mr      r12,r3  /* save r3 */
+       mfspr   r3,SPRN_SRR0            /* SRR0 is faulting address */
+       mfspr   r11,SPRN_SRR1
+       crclr   4*cr6+eq
+       BRANCH_TO_COMMON(r10, slb_miss_common)
 EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
-
 TRAMP_KVM(PACA_EXSLB, 0x480)
 
-EXC_COMMON_BEGIN(instruction_access_slb_common)
-       EXCEPTION_PROLOG_COMMON(0x480, PACA_EXSLB)
-       ld      r4,_NIP(r1)
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      do_slb_fault
-       cmpdi   r3,0
-       bne-    1f
-       b       fast_exception_return
-1:     /* Error case */
-       std     r3,RESULT(r1)
+
+/*
+ * This handler is used by the 0x380 and 0x480 SLB miss interrupts, as well as
+ * the virtual mode 0x4380 and 0x4480 interrupts if AIL is enabled.
+ */
+EXC_COMMON_BEGIN(slb_miss_common)
+       /*
+        * r13 points to the PACA, r9 contains the saved CR,
+        * r12 contains the saved r3,
+        * r11 contain the saved SRR1, SRR0 is still ready for return
+        * r3 has the faulting address
+        * r9 - r13 are saved in paca->exslb.
+        * cr6.eq is set for a D-SLB miss, clear for a I-SLB miss
+        * We assume we aren't going to take any exceptions during this
+        * procedure.
+        */
+       mflr    r10
+       stw     r9,PACA_EXSLB+EX_CCR(r13)       /* save CR in exc. frame */
+       std     r10,PACA_EXSLB+EX_LR(r13)       /* save LR */
+
+       andi.   r9,r11,MSR_PR   // Check for exception from userspace
+       cmpdi   cr4,r9,MSR_PR   // And save the result in CR4 for later
+
+       /*
+        * Test MSR_RI before calling slb_allocate_realmode, because the
+        * MSR in r11 gets clobbered. However we still want to allocate
+        * SLB in case MSR_RI=0, to minimise the risk of getting stuck in
+        * recursive SLB faults. So use cr5 for this, which is preserved.
+        */
+       andi.   r11,r11,MSR_RI  /* check for unrecoverable exception */
+       cmpdi   cr5,r11,MSR_RI
+
+       crset   4*cr0+eq
+#ifdef CONFIG_PPC_BOOK3S_64
+BEGIN_MMU_FTR_SECTION
+       bl      slb_allocate
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
+#endif
+
+       ld      r10,PACA_EXSLB+EX_LR(r13)
+       lwz     r9,PACA_EXSLB+EX_CCR(r13)       /* get saved CR */
+       mtlr    r10
+
+       /*
+        * Large address, check whether we have to allocate new contexts.
+        */
+       beq-    8f
+
+       bne-    cr5,2f          /* if unrecoverable exception, oops */
+
+       /* All done -- return from exception. */
+
+       bne     cr4,1f          /* returning to kernel */
+
+       mtcrf   0x80,r9
+       mtcrf   0x08,r9         /* MSR[PR] indication is in cr4 */
+       mtcrf   0x04,r9         /* MSR[RI] indication is in cr5 */
+       mtcrf   0x02,r9         /* I/D indication is in cr6 */
+       mtcrf   0x01,r9         /* slb_allocate uses cr0 and cr7 */
+
+       RESTORE_CTR(r9, PACA_EXSLB)
+       RESTORE_PPR_PACA(PACA_EXSLB, r9)
+       mr      r3,r12
+       ld      r9,PACA_EXSLB+EX_R9(r13)
+       ld      r10,PACA_EXSLB+EX_R10(r13)
+       ld      r11,PACA_EXSLB+EX_R11(r13)
+       ld      r12,PACA_EXSLB+EX_R12(r13)
+       ld      r13,PACA_EXSLB+EX_R13(r13)
+       RFI_TO_USER
+       b       .       /* prevent speculative execution */
+1:
+       mtcrf   0x80,r9
+       mtcrf   0x08,r9         /* MSR[PR] indication is in cr4 */
+       mtcrf   0x04,r9         /* MSR[RI] indication is in cr5 */
+       mtcrf   0x02,r9         /* I/D indication is in cr6 */
+       mtcrf   0x01,r9         /* slb_allocate uses cr0 and cr7 */
+
+       RESTORE_CTR(r9, PACA_EXSLB)
+       RESTORE_PPR_PACA(PACA_EXSLB, r9)
+       mr      r3,r12
+       ld      r9,PACA_EXSLB+EX_R9(r13)
+       ld      r10,PACA_EXSLB+EX_R10(r13)
+       ld      r11,PACA_EXSLB+EX_R11(r13)
+       ld      r12,PACA_EXSLB+EX_R12(r13)
+       ld      r13,PACA_EXSLB+EX_R13(r13)
+       RFI_TO_KERNEL
+       b       .       /* prevent speculative execution */
+
+
+2:     std     r3,PACA_EXSLB+EX_DAR(r13)
+       mr      r3,r12
+       mfspr   r11,SPRN_SRR0
+       mfspr   r12,SPRN_SRR1
+       LOAD_HANDLER(r10,unrecov_slb)
+       mtspr   SPRN_SRR0,r10
+       ld      r10,PACAKMSR(r13)
+       mtspr   SPRN_SRR1,r10
+       RFI_TO_KERNEL
+       b       .
+
+8:     std     r3,PACA_EXSLB+EX_DAR(r13)
+       mr      r3,r12
+       mfspr   r11,SPRN_SRR0
+       mfspr   r12,SPRN_SRR1
+       LOAD_HANDLER(r10, large_addr_slb)
+       mtspr   SPRN_SRR0,r10
+       ld      r10,PACAKMSR(r13)
+       mtspr   SPRN_SRR1,r10
+       RFI_TO_KERNEL
+       b       .
+
+EXC_COMMON_BEGIN(unrecov_slb)
+       EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
+       RECONCILE_IRQ_STATE(r10, r11)
        bl      save_nvgprs
+1:     addi    r3,r1,STACK_FRAME_OVERHEAD
+       bl      unrecoverable_exception
+       b       1b
+
+EXC_COMMON_BEGIN(large_addr_slb)
+       EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
        RECONCILE_IRQ_STATE(r10, r11)
-       ld      r4,_NIP(r1)
-       ld      r5,RESULT(r1)
-       addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      do_bad_slb_fault
+       ld      r3, PACA_EXSLB+EX_DAR(r13)
+       std     r3, _DAR(r1)
+       beq     cr6, 2f
+       li      r10, 0x481              /* fix trap number for I-SLB miss */
+       std     r10, _TRAP(r1)
+2:     bl      save_nvgprs
+       addi    r3, r1, STACK_FRAME_OVERHEAD
+       bl      slb_miss_large_addr
        b       ret_from_except
 
-
 EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
        .globl hardware_interrupt_hv;
 hardware_interrupt_hv:
index 0cf84e30d1cde755113f9d5460880c9126367b02..0ee3e6d50f2885d519462af046e19560ca4ace8e 100644 (file)
@@ -258,3 +258,25 @@ void __init free_unused_pacas(void)
        printk(KERN_DEBUG "Allocated %u bytes for %u pacas\n",
                        paca_ptrs_size + paca_struct_size, nr_cpu_ids);
 }
+
+void copy_mm_to_paca(struct mm_struct *mm)
+{
+#ifdef CONFIG_PPC_BOOK3S
+       mm_context_t *context = &mm->context;
+
+       get_paca()->mm_ctx_id = context->id;
+#ifdef CONFIG_PPC_MM_SLICES
+       VM_BUG_ON(!mm->context.slb_addr_limit);
+       get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
+       memcpy(&get_paca()->mm_ctx_low_slices_psize,
+              &context->low_slices_psize, sizeof(context->low_slices_psize));
+       memcpy(&get_paca()->mm_ctx_high_slices_psize,
+              &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
+#else /* CONFIG_PPC_MM_SLICES */
+       get_paca()->mm_ctx_user_psize = context->user_psize;
+       get_paca()->mm_ctx_sllp = context->sllp;
+#endif
+#else /* !CONFIG_PPC_BOOK3S */
+       return;
+#endif
+}
index 03c2e1f134bc2aa36ead452d4c9aaa6409ada64f..913c5725cdb2ad416d06513ed6a72240b4e9aa6d 100644 (file)
@@ -1482,15 +1482,6 @@ void flush_thread(void)
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 }
 
-#ifdef CONFIG_PPC_BOOK3S_64
-void arch_setup_new_exec(void)
-{
-       if (radix_enabled())
-               return;
-       hash__setup_new_exec();
-}
-#endif
-
 int set_thread_uses_vas(void)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -1719,8 +1710,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
        return 0;
 }
 
-void preload_new_slb_context(unsigned long start, unsigned long sp);
-
 /*
  * Set up a thread for executing a new program
  */
@@ -1728,10 +1717,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 {
 #ifdef CONFIG_PPC64
        unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
-
-#ifdef CONFIG_PPC_BOOK3S_64
-       preload_new_slb_context(start, sp);
-#endif
 #endif
 
        /*
@@ -1822,7 +1807,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 #ifdef CONFIG_VSX
        current->thread.used_vsr = 0;
 #endif
-       current->thread.load_slb = 0;
        current->thread.load_fp = 0;
        memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
        current->thread.fp_save_area = NULL;
index 892d4e061d6202b0dac1d37d2d2e657602b5a2cc..cdf6a996004655d51dbe452bc90af86d11335f7a 100644 (file)
@@ -15,7 +15,7 @@ obj-$(CONFIG_PPC_MMU_NOHASH)  += mmu_context_nohash.o tlb_nohash.o \
 obj-$(CONFIG_PPC_BOOK3E)       += tlb_low_$(BITS)e.o
 hash64-$(CONFIG_PPC_NATIVE)    := hash_native_64.o
 obj-$(CONFIG_PPC_BOOK3E_64)   += pgtable-book3e.o
-obj-$(CONFIG_PPC_BOOK3S_64)    += pgtable-hash64.o hash_utils_64.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
+obj-$(CONFIG_PPC_BOOK3S_64)    += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
 obj-$(CONFIG_PPC_RADIX_MMU)    += pgtable-radix.o tlb-radix.o
 obj-$(CONFIG_PPC_STD_MMU_32)   += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
 obj-$(CONFIG_PPC_STD_MMU)      += tlb_hash$(BITS).o
index 88c95dc8b141fad5182adb3b2ae7db86607877c0..f23a89d8e4ce6c8fecf0816d23b88d621c68428b 100644 (file)
@@ -1088,16 +1088,16 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
 }
 
 #ifdef CONFIG_PPC_MM_SLICES
-static unsigned int get_psize(struct mm_struct *mm, unsigned long addr)
+static unsigned int get_paca_psize(unsigned long addr)
 {
        unsigned char *psizes;
        unsigned long index, mask_index;
 
        if (addr < SLICE_LOW_TOP) {
-               psizes = mm->context.low_slices_psize;
+               psizes = get_paca()->mm_ctx_low_slices_psize;
                index = GET_LOW_SLICE_INDEX(addr);
        } else {
-               psizes = mm->context.high_slices_psize;
+               psizes = get_paca()->mm_ctx_high_slices_psize;
                index = GET_HIGH_SLICE_INDEX(addr);
        }
        mask_index = index & 0x1;
@@ -1105,9 +1105,9 @@ static unsigned int get_psize(struct mm_struct *mm, unsigned long addr)
 }
 
 #else
-unsigned int get_psize(struct mm_struct *mm, unsigned long addr)
+unsigned int get_paca_psize(unsigned long addr)
 {
-       return mm->context.user_psize;
+       return get_paca()->mm_ctx_user_psize;
 }
 #endif
 
@@ -1118,11 +1118,15 @@ unsigned int get_psize(struct mm_struct *mm, unsigned long addr)
 #ifdef CONFIG_PPC_64K_PAGES
 void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
 {
-       if (get_psize(mm, addr) == MMU_PAGE_4K)
+       if (get_slice_psize(mm, addr) == MMU_PAGE_4K)
                return;
        slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K);
        copro_flush_all_slbs(mm);
-       core_flush_all_slbs(mm);
+       if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
+
+               copy_mm_to_paca(mm);
+               slb_flush_and_rebolt();
+       }
 }
 #endif /* CONFIG_PPC_64K_PAGES */
 
@@ -1187,6 +1191,22 @@ void hash_failure_debug(unsigned long ea, unsigned long access,
                trap, vsid, ssize, psize, lpsize, pte);
 }
 
+static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
+                            int psize, bool user_region)
+{
+       if (user_region) {
+               if (psize != get_paca_psize(ea)) {
+                       copy_mm_to_paca(mm);
+                       slb_flush_and_rebolt();
+               }
+       } else if (get_paca()->vmalloc_sllp !=
+                  mmu_psize_defs[mmu_vmalloc_psize].sllp) {
+               get_paca()->vmalloc_sllp =
+                       mmu_psize_defs[mmu_vmalloc_psize].sllp;
+               slb_vmalloc_update();
+       }
+}
+
 /* Result code is:
  *  0 - handled
  *  1 - normal page fault
@@ -1219,7 +1239,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
                        rc = 1;
                        goto bail;
                }
-               psize = get_psize(mm, ea);
+               psize = get_slice_psize(mm, ea);
                ssize = user_segment_size(ea);
                vsid = get_user_vsid(&mm->context, ea, ssize);
                break;
@@ -1307,6 +1327,9 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
                        WARN_ON(1);
                }
 #endif
+               if (current->mm == mm)
+                       check_paca_psize(ea, mm, psize, user_region);
+
                goto bail;
        }
 
@@ -1341,14 +1364,15 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
                               "to 4kB pages because of "
                               "non-cacheable mapping\n");
                        psize = mmu_vmalloc_psize = MMU_PAGE_4K;
-                       slb_vmalloc_update();
                        copro_flush_all_slbs(mm);
-                       core_flush_all_slbs(mm);
                }
        }
 
 #endif /* CONFIG_PPC_64K_PAGES */
 
+       if (current->mm == mm)
+               check_paca_psize(ea, mm, psize, user_region);
+
 #ifdef CONFIG_PPC_64K_PAGES
        if (psize == MMU_PAGE_64K)
                rc = __hash_page_64K(ea, access, vsid, ptep, trap,
@@ -1436,7 +1460,7 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
 #ifdef CONFIG_PPC_MM_SLICES
 static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
 {
-       int psize = get_psize(mm, ea);
+       int psize = get_slice_psize(mm, ea);
 
        /* We only prefault standard pages for now */
        if (unlikely(psize != mm->context.user_psize))
index 28ae2835db3d402bb648dde400676a2ed28c6570..f84e14f23e50aa49ee76c12e81e52f91b40bb4a0 100644 (file)
@@ -54,7 +54,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                 * MMU context id, which is then moved to SPRN_PID.
                 *
                 * For the hash MMU it is either the first load from slb_cache
-                * in switch_slb(), and/or load of MMU context id.
+                * in switch_slb(), and/or the store of paca->mm_ctx_id in
+                * copy_mm_to_paca().
                 *
                 * On the other side, the barrier is in mm/tlb-radix.c for
                 * radix which orders earlier stores to clear the PTEs vs
index 510f103d7813861cffee9d0c7e6200a1ec3afb98..dbd8f762140b6942b32f41a8466cb3bfc7aee840 100644 (file)
@@ -53,8 +53,6 @@ int hash__alloc_context_id(void)
 }
 EXPORT_SYMBOL_GPL(hash__alloc_context_id);
 
-void slb_setup_new_exec(void);
-
 static int hash__init_new_context(struct mm_struct *mm)
 {
        int index;
@@ -86,13 +84,6 @@ static int hash__init_new_context(struct mm_struct *mm)
        return index;
 }
 
-void hash__setup_new_exec(void)
-{
-       slice_setup_new_exec();
-
-       slb_setup_new_exec();
-}
-
 static int radix__init_new_context(struct mm_struct *mm)
 {
        unsigned long rts_field;
index b438220c4336b26bea35bc9853df773c16c523c6..513c6596140ddbff4778fb5ad16deb02d541db80 100644 (file)
@@ -14,7 +14,6 @@
  *      2 of the License, or (at your option) any later version.
  */
 
-#include <asm/asm-prototypes.h>
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
@@ -34,7 +33,7 @@ enum slb_index {
        KSTACK_INDEX    = 1, /* Kernel stack map */
 };
 
-static long slb_allocate_user(struct mm_struct *mm, unsigned long ea);
+extern void slb_allocate(unsigned long ea);
 
 #define slb_esid_mask(ssize)   \
        (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T)
@@ -45,17 +44,11 @@ static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
        return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index;
 }
 
-static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize,
-                                        unsigned long flags)
-{
-       return (vsid << slb_vsid_shift(ssize)) | flags |
-               ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
-}
-
 static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
                                         unsigned long flags)
 {
-       return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags);
+       return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags |
+               ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
 }
 
 static inline void slb_shadow_update(unsigned long ea, int ssize,
@@ -122,9 +115,6 @@ void slb_restore_bolted_realmode(void)
 {
        __slb_restore_bolted_realmode();
        get_paca()->slb_cache_ptr = 0;
-
-       get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
-       get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
 }
 
 /*
@@ -132,6 +122,9 @@ void slb_restore_bolted_realmode(void)
  */
 void slb_flush_all_realmode(void)
 {
+       /*
+        * This flushes all SLB entries including 0, so it must be realmode.
+        */
        asm volatile("slbmte %0,%0; slbia" : : "r" (0));
 }
 
@@ -177,9 +170,6 @@ void slb_flush_and_rebolt(void)
                     : "memory");
 
        get_paca()->slb_cache_ptr = 0;
-
-       get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
-       get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
 }
 
 void slb_save_contents(struct slb_entry *slb_ptr)
@@ -212,7 +202,7 @@ void slb_dump_contents(struct slb_entry *slb_ptr)
                return;
 
        pr_err("SLB contents of cpu 0x%x\n", smp_processor_id());
-       pr_err("Last SLB entry inserted at slot %u\n", get_paca()->stab_rr);
+       pr_err("Last SLB entry inserted at slot %lld\n", get_paca()->stab_rr);
 
        for (i = 0; i < mmu_slb_size; i++) {
                e = slb_ptr->esid;
@@ -257,119 +247,41 @@ void slb_vmalloc_update(void)
        slb_flush_and_rebolt();
 }
 
-static bool preload_hit(struct thread_info *ti, unsigned long esid)
-{
-       u8 i;
-
-       for (i = 0; i < ti->slb_preload_nr; i++) {
-               u8 idx;
-
-               idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
-               if (esid == ti->slb_preload_esid[idx])
-                       return true;
-       }
-       return false;
-}
-
-static bool preload_add(struct thread_info *ti, unsigned long ea)
-{
-       unsigned long esid;
-       u8 idx;
-
-       if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
-               /* EAs are stored >> 28 so 256MB segments don't need clearing */
-               if (ea & ESID_MASK_1T)
-                       ea &= ESID_MASK_1T;
-       }
-
-       esid = ea >> SID_SHIFT;
-
-       if (preload_hit(ti, esid))
-               return false;
-
-       idx = (ti->slb_preload_tail + ti->slb_preload_nr) % SLB_PRELOAD_NR;
-       ti->slb_preload_esid[idx] = esid;
-       if (ti->slb_preload_nr == SLB_PRELOAD_NR)
-               ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
-       else
-               ti->slb_preload_nr++;
-
-       return true;
-}
-
-static void preload_age(struct thread_info *ti)
-{
-       if (!ti->slb_preload_nr)
-               return;
-       ti->slb_preload_nr--;
-       ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
-}
-
-void slb_setup_new_exec(void)
+/* Helper function to compare esids.  There are four cases to handle.
+ * 1. The system is not 1T segment size capable.  Use the GET_ESID compare.
+ * 2. The system is 1T capable, both addresses are < 1T, use the GET_ESID compare.
+ * 3. The system is 1T capable, only one of the two addresses is > 1T.  This is not a match.
+ * 4. The system is 1T capable, both addresses are > 1T, use the GET_ESID_1T macro to compare.
+ */
+static inline int esids_match(unsigned long addr1, unsigned long addr2)
 {
-       struct thread_info *ti = current_thread_info();
-       struct mm_struct *mm = current->mm;
-       unsigned long exec = 0x10000000;
+       int esid_1t_count;
 
-       /*
-        * We have no good place to clear the slb preload cache on exec,
-        * flush_thread is about the earliest arch hook but that happens
-        * after we switch to the mm and have aleady preloaded the SLBEs.
-        *
-        * For the most part that's probably okay to use entries from the
-        * previous exec, they will age out if unused. It may turn out to
-        * be an advantage to clear the cache before switching to it,
-        * however.
-        */
-
-       /*
-        * preload some userspace segments into the SLB.
-        * Almost all 32 and 64bit PowerPC executables are linked at
-        * 0x10000000 so it makes sense to preload this segment.
-        */
-       if (!is_kernel_addr(exec)) {
-               if (preload_add(ti, exec))
-                       slb_allocate_user(mm, exec);
-       }
-
-       /* Libraries and mmaps. */
-       if (!is_kernel_addr(mm->mmap_base)) {
-               if (preload_add(ti, mm->mmap_base))
-                       slb_allocate_user(mm, mm->mmap_base);
-       }
-}
+       /* System is not 1T segment size capable. */
+       if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
+               return (GET_ESID(addr1) == GET_ESID(addr2));
 
-void preload_new_slb_context(unsigned long start, unsigned long sp)
-{
-       struct thread_info *ti = current_thread_info();
-       struct mm_struct *mm = current->mm;
-       unsigned long heap = mm->start_brk;
+       esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) +
+                               ((addr2 >> SID_SHIFT_1T) != 0));
 
-       /* Userspace entry address. */
-       if (!is_kernel_addr(start)) {
-               if (preload_add(ti, start))
-                       slb_allocate_user(mm, start);
-       }
+       /* both addresses are < 1T */
+       if (esid_1t_count == 0)
+               return (GET_ESID(addr1) == GET_ESID(addr2));
 
-       /* Top of stack, grows down. */
-       if (!is_kernel_addr(sp)) {
-               if (preload_add(ti, sp))
-                       slb_allocate_user(mm, sp);
-       }
+       /* One address < 1T, the other > 1T.  Not a match */
+       if (esid_1t_count == 1)
+               return 0;
 
-       /* Bottom of heap, grows up. */
-       if (heap && !is_kernel_addr(heap)) {
-               if (preload_add(ti, heap))
-                       slb_allocate_user(mm, heap);
-       }
+       /* Both addresses are > 1T. */
+       return (GET_ESID_1T(addr1) == GET_ESID_1T(addr2));
 }
 
-
 /* Flush all user entries from the segment table of the current processor. */
 void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 {
-       struct thread_info *ti = task_thread_info(tsk);
-       u8 i;
+       unsigned long pc = KSTK_EIP(tsk);
+       unsigned long stack = KSTK_ESP(tsk);
+       unsigned long exec_base;
 
        /*
         * We need interrupts hard-disabled here, not just soft-disabled,
@@ -392,6 +304,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
                if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
                    offset <= SLB_CACHE_ENTRIES) {
                        unsigned long slbie_data = 0;
+                       int i;
 
                        asm volatile("isync" : : : "memory");
                        for (i = 0; i < offset; i++) {
@@ -422,60 +335,67 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
                                     "isync"
                                     :: "r"(ksp_vsid_data),
                                        "r"(ksp_esid_data));
-
-                       get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
                }
 
                get_paca()->slb_cache_ptr = 0;
        }
-       get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
+
+       copy_mm_to_paca(mm);
 
        /*
-        * We gradually age out SLBs after a number of context switches to
-        * reduce reload overhead of unused entries (like we do with FP/VEC
-        * reload). Each time we wrap 256 switches, take an entry out of the
-        * SLB preload cache.
+        * preload some userspace segments into the SLB.
+        * Almost all 32 and 64bit PowerPC executables are linked at
+        * 0x10000000 so it makes sense to preload this segment.
         */
-       tsk->thread.load_slb++;
-       if (!tsk->thread.load_slb) {
-               unsigned long pc = KSTK_EIP(tsk);
+       exec_base = 0x10000000;
 
-               preload_age(ti);
-               preload_add(ti, pc);
-       }
+       if (is_kernel_addr(pc) || is_kernel_addr(stack) ||
+           is_kernel_addr(exec_base))
+               return;
 
-       for (i = 0; i < ti->slb_preload_nr; i++) {
-               unsigned long ea;
-               u8 idx;
+       slb_allocate(pc);
 
-               idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
-               ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT;
+       if (!esids_match(pc, stack))
+               slb_allocate(stack);
 
-               slb_allocate_user(mm, ea);
-       }
+       if (!esids_match(pc, exec_base) &&
+           !esids_match(stack, exec_base))
+               slb_allocate(exec_base);
 }
 
-void slb_set_size(u16 size)
+static inline void patch_slb_encoding(unsigned int *insn_addr,
+                                     unsigned int immed)
 {
-       mmu_slb_size = size;
+
+       /*
+        * This function patches either an li or a cmpldi instruction with
+        * a new immediate value. This relies on the fact that both li
+        * (which is actually addi) and cmpldi both take a 16-bit immediate
+        * value, and it is situated in the same location in the instruction,
+        * ie. bits 16-31 (Big endian bit order) or the lower 16 bits.
+        * The signedness of the immediate operand differs between the two
+        * instructions however this code is only ever patching a small value,
+        * much less than 1 << 15, so we can get away with it.
+        * To patch the value we read the existing instruction, clear the
+        * immediate value, and or in our new value, then write the instruction
+        * back.
+        */
+       unsigned int insn = (*insn_addr & 0xffff0000) | immed;
+       patch_instruction(insn_addr, insn);
 }
 
-static void cpu_flush_slb(void *parm)
-{
-       struct mm_struct *mm = parm;
-       unsigned long flags;
+extern u32 slb_miss_kernel_load_linear[];
+extern u32 slb_miss_kernel_load_io[];
+extern u32 slb_compare_rr_to_size[];
+extern u32 slb_miss_kernel_load_vmemmap[];
 
-       if (mm != current->active_mm)
+void slb_set_size(u16 size)
+{
+       if (mmu_slb_size == size)
                return;
 
-       local_irq_save(flags);
-       slb_flush_and_rebolt();
-       local_irq_restore(flags);
-}
-
-void core_flush_all_slbs(struct mm_struct *mm)
-{
-       on_each_cpu(cpu_flush_slb, mm, 1);
+       mmu_slb_size = size;
+       patch_slb_encoding(slb_compare_rr_to_size, mmu_slb_size);
 }
 
 void slb_initialize(void)
@@ -497,16 +417,24 @@ void slb_initialize(void)
 #endif
        if (!slb_encoding_inited) {
                slb_encoding_inited = 1;
+               patch_slb_encoding(slb_miss_kernel_load_linear,
+                                  SLB_VSID_KERNEL | linear_llp);
+               patch_slb_encoding(slb_miss_kernel_load_io,
+                                  SLB_VSID_KERNEL | io_llp);
+               patch_slb_encoding(slb_compare_rr_to_size,
+                                  mmu_slb_size);
+
                pr_devel("SLB: linear  LLP = %04lx\n", linear_llp);
                pr_devel("SLB: io      LLP = %04lx\n", io_llp);
+
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
+               patch_slb_encoding(slb_miss_kernel_load_vmemmap,
+                                  SLB_VSID_KERNEL | vmemmap_llp);
                pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp);
 #endif
        }
 
        get_paca()->stab_rr = SLB_NUM_BOLTED - 1;
-       get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
-       get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
 
        lflags = SLB_VSID_KERNEL | linear_llp;
 
@@ -530,13 +458,52 @@ void slb_initialize(void)
        asm volatile("isync":::"memory");
 }
 
-static void slb_cache_update(unsigned long esid_data)
+static void insert_slb_entry(unsigned long vsid, unsigned long ea,
+                            int bpsize, int ssize)
 {
+       unsigned long flags, vsid_data, esid_data;
+       enum slb_index index;
        int slb_cache_index;
 
        if (cpu_has_feature(CPU_FTR_ARCH_300))
                return; /* ISAv3.0B and later does not use slb_cache */
 
+       /*
+        * We are irq disabled, hence should be safe to access PACA.
+        */
+       VM_WARN_ON(!irqs_disabled());
+
+       /*
+        * We can't take a PMU exception in the following code, so hard
+        * disable interrupts.
+        */
+       hard_irq_disable();
+
+       index = get_paca()->stab_rr;
+
+       /*
+        * simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
+        */
+       if (index < (mmu_slb_size - 1))
+               index++;
+       else
+               index = SLB_NUM_BOLTED;
+
+       get_paca()->stab_rr = index;
+
+       flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
+       vsid_data = (vsid << slb_vsid_shift(ssize)) | flags |
+                   ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
+       esid_data = mk_esid_data(ea, ssize, index);
+
+       /*
+        * No need for an isync before or after this slbmte. The exception
+        * we enter with and the rfid we exit with are context synchronizing.
+        * Also we only handle user segments here.
+        */
+       asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)
+                    : "memory");
+
        /*
         * Now update slb cache entries
         */
@@ -558,196 +525,58 @@ static void slb_cache_update(unsigned long esid_data)
        }
 }
 
-static enum slb_index alloc_slb_index(bool kernel)
+static void handle_multi_context_slb_miss(int context_id, unsigned long ea)
 {
-       enum slb_index index;
-
-       /*
-        * The allocation bitmaps can become out of synch with the SLB
-        * when the _switch code does slbie when bolting a new stack
-        * segment and it must not be anywhere else in the SLB. This leaves
-        * a kernel allocated entry that is unused in the SLB. With very
-        * large systems or small segment sizes, the bitmaps could slowly
-        * fill with these entries. They will eventually be cleared out
-        * by the round robin allocator in that case, so it's probably not
-        * worth accounting for.
-        */
+       struct mm_struct *mm = current->mm;
+       unsigned long vsid;
+       int bpsize;
 
        /*
-        * SLBs beyond 32 entries are allocated with stab_rr only
-        * POWER7/8/9 have 32 SLB entries, this could be expanded if a
-        * future CPU has more.
+        * We are always above 1TB, hence use high user segment size.
         */
-       if (get_paca()->slb_used_bitmap != U32_MAX) {
-               index = ffz(get_paca()->slb_used_bitmap);
-               get_paca()->slb_used_bitmap |= 1U << index;
-               if (kernel)
-                       get_paca()->slb_kern_bitmap |= 1U << index;
-       } else {
-               /* round-robin replacement of slb starting at SLB_NUM_BOLTED. */
-               index = get_paca()->stab_rr;
-               if (index < (mmu_slb_size - 1))
-                       index++;
-               else
-                       index = SLB_NUM_BOLTED;
-               get_paca()->stab_rr = index;
-               if (index < 32) {
-                       if (kernel)
-                               get_paca()->slb_kern_bitmap |= 1U << index;
-                       else
-                               get_paca()->slb_kern_bitmap &= ~(1U << index);
-               }
-       }
-       BUG_ON(index < SLB_NUM_BOLTED);
-
-       return index;
+       vsid = get_vsid(context_id, ea, mmu_highuser_ssize);
+       bpsize = get_slice_psize(mm, ea);
+       insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize);
 }
 
-static long slb_insert_entry(unsigned long ea, unsigned long context,
-                               unsigned long flags, int ssize, bool kernel)
+void slb_miss_large_addr(struct pt_regs *regs)
 {
-       unsigned long vsid;
-       unsigned long vsid_data, esid_data;
-       enum slb_index index;
-
-       vsid = get_vsid(context, ea, ssize);
-       if (!vsid)
-               return -EFAULT;
+       enum ctx_state prev_state = exception_enter();
+       unsigned long ea = regs->dar;
+       int context;
 
-       index = alloc_slb_index(kernel);
-
-       vsid_data = __mk_vsid_data(vsid, ssize, flags);
-       esid_data = mk_esid_data(ea, ssize, index);
+       if (REGION_ID(ea) != USER_REGION_ID)
+               goto slb_bad_addr;
 
        /*
-        * No need for an isync before or after this slbmte. The exception
-        * we enter with and the rfid we exit with are context synchronizing.
-        * Also we only handle user segments here.
+        * Are we beyound what the page table layout supports ?
         */
-       asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
+       if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
+               goto slb_bad_addr;
 
-       if (!kernel)
-               slb_cache_update(esid_data);
-
-       return 0;
-}
-
-static long slb_allocate_kernel(unsigned long ea, unsigned long id)
-{
-       unsigned long context;
-       unsigned long flags;
-       int ssize;
-
-       if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT))
-               return -EFAULT;
-
-       if (id == KERNEL_REGION_ID) {
-               flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-       } else if (id == VMEMMAP_REGION_ID) {
-               flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
-#endif
-       } else if (id == VMALLOC_REGION_ID) {
-               if (ea < H_VMALLOC_END)
-                       flags = get_paca()->vmalloc_sllp;
-               else
-                       flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
-       } else {
-               return -EFAULT;
-       }
-
-       ssize = MMU_SEGSIZE_1T;
-       if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
-               ssize = MMU_SEGSIZE_256M;
-
-       context = id - KERNEL_REGION_CONTEXT_OFFSET;
-
-       return slb_insert_entry(ea, context, flags, ssize, true);
-}
-
-static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
-{
-       unsigned long context;
-       unsigned long flags;
-       int bpsize;
-       int ssize;
+       /* Lower address should have been handled by asm code */
+       if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT))
+               goto slb_bad_addr;
 
        /*
         * consider this as bad access if we take a SLB miss
         * on an address above addr limit.
         */
-       if (ea >= mm->context.slb_addr_limit)
-               return -EFAULT;
+       if (ea >= current->mm->context.slb_addr_limit)
+               goto slb_bad_addr;
 
-       context = get_ea_context(&mm->context, ea);
+       context = get_ea_context(&current->mm->context, ea);
        if (!context)
-               return -EFAULT;
-
-       if (unlikely(ea >= H_PGTABLE_RANGE)) {
-               WARN_ON(1);
-               return -EFAULT;
-       }
-
-       ssize = user_segment_size(ea);
-
-       bpsize = get_slice_psize(mm, ea);
-       flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
-
-       return slb_insert_entry(ea, context, flags, ssize, false);
-}
-
-long do_slb_fault(struct pt_regs *regs, unsigned long ea)
-{
-       unsigned long id = REGION_ID(ea);
-
-       /* IRQs are not reconciled here, so can't check irqs_disabled */
-       VM_WARN_ON(mfmsr() & MSR_EE);
-
-       if (unlikely(!(regs->msr & MSR_RI)))
-               return -EINVAL;
-
-       /*
-        * SLB kernel faults must be very careful not to touch anything
-        * that is not bolted. E.g., PACA and global variables are okay,
-        * mm->context stuff is not.
-        *
-        * SLB user faults can access all of kernel memory, but must be
-        * careful not to touch things like IRQ state because it is not
-        * "reconciled" here. The difficulty is that we must use
-        * fast_exception_return to return from kernel SLB faults without
-        * looking at possible non-bolted memory. We could test user vs
-        * kernel faults in the interrupt handler asm and do a full fault,
-        * reconcile, ret_from_except for user faults which would make them
-        * first class kernel code. But for performance it's probably nicer
-        * if they go via fast_exception_return too.
-        */
-       if (id >= KERNEL_REGION_ID) {
-               return slb_allocate_kernel(ea, id);
-       } else {
-               struct mm_struct *mm = current->mm;
-               long err;
-
-               if (unlikely(!mm))
-                       return -EFAULT;
+               goto slb_bad_addr;
 
-               err = slb_allocate_user(mm, ea);
-               if (!err)
-                       preload_add(current_thread_info(), ea);
-
-               return err;
-       }
-}
+       handle_multi_context_slb_miss(context, ea);
+       exception_exit(prev_state);
+       return;
 
-void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err)
-{
-       if (err == -EFAULT) {
-               if (user_mode(regs))
-                       _exception(SIGSEGV, regs, SEGV_BNDERR, ea);
-               else
-                       bad_page_fault(regs, ea, SIGSEGV);
-       } else if (err == -EINVAL) {
-               unrecoverable_exception(regs);
-       } else {
-               BUG();
-       }
+slb_bad_addr:
+       if (user_mode(regs))
+               _exception(SIGSEGV, regs, SEGV_BNDERR, ea);
+       else
+               bad_page_fault(regs, ea, SIGSEGV);
+       exception_exit(prev_state);
 }
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
new file mode 100644 (file)
index 0000000..4d2e921
--- /dev/null
@@ -0,0 +1,335 @@
+/*
+ * Low-level SLB routines
+ *
+ * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
+ *
+ * Based on earlier C version:
+ * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
+ *    Copyright (c) 2001 Dave Engebretsen
+ * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cputable.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/firmware.h>
+#include <asm/feature-fixups.h>
+
+/*
+ * This macro generates asm code to compute the VSID scramble
+ * function.  Used in slb_allocate() and do_stab_bolted.  The function
+ * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
+ *
+ *     rt = register containing the proto-VSID and into which the
+ *             VSID will be stored
+ *     rx = scratch register (clobbered)
+ *     rf = flags
+ *
+ *     - rt and rx must be different registers
+ *     - The answer will end up in the low VSID_BITS bits of rt.  The higher
+ *       bits may contain other garbage, so you may need to mask the
+ *       result.
+ */
+#define ASM_VSID_SCRAMBLE(rt, rx, rf, size)                            \
+       lis     rx,VSID_MULTIPLIER_##size@h;                            \
+       ori     rx,rx,VSID_MULTIPLIER_##size@l;                         \
+       mulld   rt,rt,rx;               /* rt = rt * MULTIPLIER */      \
+/*                                                                     \
+ * powermac get slb fault before feature fixup, so make 65 bit part     \
+ * the default part of feature fixup                                   \
+ */                                                                    \
+BEGIN_MMU_FTR_SECTION                                                  \
+       srdi    rx,rt,VSID_BITS_65_##size;                              \
+       clrldi  rt,rt,(64-VSID_BITS_65_##size);                         \
+       add     rt,rt,rx;                                               \
+       addi    rx,rt,1;                                                \
+       srdi    rx,rx,VSID_BITS_65_##size;                              \
+       add     rt,rt,rx;                                               \
+       rldimi  rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_65_##size)); \
+MMU_FTR_SECTION_ELSE                                                   \
+       srdi    rx,rt,VSID_BITS_##size;                                 \
+       clrldi  rt,rt,(64-VSID_BITS_##size);                            \
+       add     rt,rt,rx;               /* add high and low bits */     \
+       addi    rx,rt,1;                                                \
+       srdi    rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */   \
+       add     rt,rt,rx;                                               \
+       rldimi  rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_##size)); \
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
+
+
+/* void slb_allocate(unsigned long ea);
+ *
+ * Create an SLB entry for the given EA (user or kernel).
+ *     r3 = faulting address, r13 = PACA
+ *     r9, r10, r11 are clobbered by this function
+ *     r3 is preserved.
+ * No other registers are examined or changed.
+ */
+_GLOBAL(slb_allocate)
+       /*
+        * Check if the address falls within the range of the first context, or
+        * if we may need to handle multi context. For the first context we
+        * allocate the slb entry via the fast path below. For large address we
+        * branch out to C-code and see if additional contexts have been
+        * allocated.
+        * The test here is:
+        *   (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT)
+        */
+       rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4)
+       bne-    8f
+
+       srdi    r9,r3,60                /* get region */
+       srdi    r10,r3,SID_SHIFT        /* get esid */
+       cmpldi  cr7,r9,0xc              /* cmp PAGE_OFFSET for later use */
+
+       /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */
+       blt     cr7,0f                  /* user or kernel? */
+
+       /* Check if hitting the linear mapping or some other kernel space
+       */
+       bne     cr7,1f
+
+       /* Linear mapping encoding bits, the "li" instruction below will
+        * be patched by the kernel at boot
+        */
+.globl slb_miss_kernel_load_linear
+slb_miss_kernel_load_linear:
+       li      r11,0
+       /*
+        * context = (ea >> 60) - (0xc - 1)
+        * r9 = region id.
+        */
+       subi    r9,r9,KERNEL_REGION_CONTEXT_OFFSET
+
+BEGIN_FTR_SECTION
+       b       .Lslb_finish_load
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
+       b       .Lslb_finish_load_1T
+
+1:
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+       cmpldi  cr0,r9,0xf
+       bne     1f
+/* Check virtual memmap region. To be patched at kernel boot */
+.globl slb_miss_kernel_load_vmemmap
+slb_miss_kernel_load_vmemmap:
+       li      r11,0
+       b       6f
+1:
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+       /*
+        * r10 contains the ESID, which is the original faulting EA shifted
+        * right by 28 bits. We need to compare that with (H_VMALLOC_END >> 28)
+        * which is 0xd00038000. That can't be used as an immediate, even if we
+        * ignored the 0xd, so we have to load it into a register, and we only
+        * have one register free. So we must load all of (H_VMALLOC_END >> 28)
+        * into a register and compare ESID against that.
+        */
+       lis     r11,(H_VMALLOC_END >> 32)@h     // r11 = 0xffffffffd0000000
+       ori     r11,r11,(H_VMALLOC_END >> 32)@l // r11 = 0xffffffffd0003800
+       // Rotate left 4, then mask with 0xffffffff0
+       rldic   r11,r11,4,28                    // r11 = 0xd00038000
+       cmpld   r10,r11                         // if r10 >= r11
+       bge     5f                              //   goto io_mapping
+
+       /*
+        * vmalloc mapping gets the encoding from the PACA as the mapping
+        * can be demoted from 64K -> 4K dynamically on some machines.
+        */
+       lhz     r11,PACAVMALLOCSLLP(r13)
+       b       6f
+5:
+       /* IO mapping */
+.globl slb_miss_kernel_load_io
+slb_miss_kernel_load_io:
+       li      r11,0
+6:
+       /*
+        * context = (ea >> 60) - (0xc - 1)
+        * r9 = region id.
+        */
+       subi    r9,r9,KERNEL_REGION_CONTEXT_OFFSET
+
+BEGIN_FTR_SECTION
+       b       .Lslb_finish_load
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
+       b       .Lslb_finish_load_1T
+
+0:     /*
+        * For userspace addresses, make sure this is region 0.
+        */
+       cmpdi   r9, 0
+       bne-    8f
+        /*
+         * user space make sure we are within the allowed limit
+        */
+       ld      r11,PACA_SLB_ADDR_LIMIT(r13)
+       cmpld   r3,r11
+       bge-    8f
+
+       /* when using slices, we extract the psize off the slice bitmaps
+        * and then we need to get the sllp encoding off the mmu_psize_defs
+        * array.
+        *
+        * XXX This is a bit inefficient especially for the normal case,
+        * so we should try to implement a fast path for the standard page
+        * size using the old sllp value so we avoid the array. We cannot
+        * really do dynamic patching unfortunately as processes might flip
+        * between 4k and 64k standard page size
+        */
+#ifdef CONFIG_PPC_MM_SLICES
+       /* r10 have esid */
+       cmpldi  r10,16
+       /* below SLICE_LOW_TOP */
+       blt     5f
+       /*
+        * Handle hpsizes,
+        * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index
+        */
+       srdi    r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */
+       addi    r9,r11,PACAHIGHSLICEPSIZE
+       lbzx    r9,r13,r9               /* r9 is hpsizes[r11] */
+       /* r11 = (r10 >> (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)) & 0x1 */
+       rldicl  r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63
+       b       6f
+
+5:
+       /*
+        * Handle lpsizes
+        * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index
+        */
+       srdi    r11,r10,1 /* index */
+       addi    r9,r11,PACALOWSLICESPSIZE
+       lbzx    r9,r13,r9               /* r9 is lpsizes[r11] */
+       rldicl  r11,r10,0,63            /* r11 = r10 & 0x1 */
+6:
+       sldi    r11,r11,2  /* index * 4 */
+       /* Extract the psize and multiply to get an array offset */
+       srd     r9,r9,r11
+       andi.   r9,r9,0xf
+       mulli   r9,r9,MMUPSIZEDEFSIZE
+
+       /* Now get to the array and obtain the sllp
+        */
+       ld      r11,PACATOC(r13)
+       ld      r11,mmu_psize_defs@got(r11)
+       add     r11,r11,r9
+       ld      r11,MMUPSIZESLLP(r11)
+       ori     r11,r11,SLB_VSID_USER
+#else
+       /* paca context sllp already contains the SLB_VSID_USER bits */
+       lhz     r11,PACACONTEXTSLLP(r13)
+#endif /* CONFIG_PPC_MM_SLICES */
+
+       ld      r9,PACACONTEXTID(r13)
+BEGIN_FTR_SECTION
+       cmpldi  r10,0x1000
+       bge     .Lslb_finish_load_1T
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
+       b       .Lslb_finish_load
+
+8:     /* invalid EA - return an error indication */
+       crset   4*cr0+eq                /* indicate failure */
+       blr
+
+/*
+ * Finish loading of an SLB entry and return
+ *
+ * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET
+ */
+.Lslb_finish_load:
+       rldimi  r10,r9,ESID_BITS,0
+       ASM_VSID_SCRAMBLE(r10,r9,r11,256M)
+       /* r3 = EA, r11 = VSID data */
+       /*
+        * Find a slot, round robin. Previously we tried to find a
+        * free slot first but that took too long. Unfortunately we
+        * dont have any LRU information to help us choose a slot.
+        */
+
+       mr      r9,r3
+
+       /* slb_finish_load_1T continues here. r9=EA with non-ESID bits clear */
+7:     ld      r10,PACASTABRR(r13)
+       addi    r10,r10,1
+       /* This gets soft patched on boot. */
+.globl slb_compare_rr_to_size
+slb_compare_rr_to_size:
+       cmpldi  r10,0
+
+       blt+    4f
+       li      r10,SLB_NUM_BOLTED
+
+4:
+       std     r10,PACASTABRR(r13)
+
+3:
+       rldimi  r9,r10,0,36             /* r9  = EA[0:35] | entry */
+       oris    r10,r9,SLB_ESID_V@h     /* r10 = r9 | SLB_ESID_V */
+
+       /* r9 = ESID data, r11 = VSID data */
+
+       /*
+        * No need for an isync before or after this slbmte. The exception
+        * we enter with and the rfid we exit with are context synchronizing.
+        */
+       slbmte  r11,r10
+
+       /* we're done for kernel addresses */
+       crclr   4*cr0+eq                /* set result to "success" */
+       bgelr   cr7
+
+       /* Update the slb cache */
+       lhz     r9,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
+       cmpldi  r9,SLB_CACHE_ENTRIES
+       bge     1f
+
+       /* still room in the slb cache */
+       sldi    r11,r9,2                /* r11 = offset * sizeof(u32) */
+       srdi    r10,r10,28              /* get the 36 bits of the ESID */
+       add     r11,r11,r13             /* r11 = (u32 *)paca + offset */
+       stw     r10,PACASLBCACHE(r11)   /* paca->slb_cache[offset] = esid */
+       addi    r9,r9,1                 /* offset++ */
+       b       2f
+1:                                     /* offset >= SLB_CACHE_ENTRIES */
+       li      r9,SLB_CACHE_ENTRIES+1
+2:
+       sth     r9,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
+       crclr   4*cr0+eq                /* set result to "success" */
+       blr
+
+/*
+ * Finish loading of a 1T SLB entry (for the kernel linear mapping) and return.
+ *
+ * r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9
+ */
+.Lslb_finish_load_1T:
+       srdi    r10,r10,(SID_SHIFT_1T - SID_SHIFT)      /* get 1T ESID */
+       rldimi  r10,r9,ESID_BITS_1T,0
+       ASM_VSID_SCRAMBLE(r10,r9,r11,1T)
+
+       li      r10,MMU_SEGSIZE_1T
+       rldimi  r11,r10,SLB_VSID_SSIZE_SHIFT,0  /* insert segment size */
+
+       /* r3 = EA, r11 = VSID data */
+       clrrdi  r9,r3,SID_SHIFT_1T      /* clear out non-ESID bits */
+       b       7b
+
+
+_ASM_NOKPROBE_SYMBOL(slb_allocate)
+_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear)
+_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io)
+_ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size)
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_vmemmap)
+#endif
index fc5b3a1ec6664c4c9973e252459cf01e8c0f83f4..205fe557ca109dda9a16e3fd9a8fe0f75909bc25 100644 (file)
@@ -207,6 +207,23 @@ static bool slice_check_range_fits(struct mm_struct *mm,
        return true;
 }
 
+static void slice_flush_segments(void *parm)
+{
+#ifdef CONFIG_PPC64
+       struct mm_struct *mm = parm;
+       unsigned long flags;
+
+       if (mm != current->active_mm)
+               return;
+
+       copy_mm_to_paca(current->active_mm);
+
+       local_irq_save(flags);
+       slb_flush_and_rebolt();
+       local_irq_restore(flags);
+#endif
+}
+
 static void slice_convert(struct mm_struct *mm,
                                const struct slice_mask *mask, int psize)
 {
@@ -272,9 +289,6 @@ static void slice_convert(struct mm_struct *mm,
        spin_unlock_irqrestore(&slice_convert_lock, flags);
 
        copro_flush_all_slbs(mm);
-#ifdef CONFIG_PPC64
-       core_flush_all_slbs(mm);
-#endif
 }
 
 /*
@@ -488,9 +502,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
                 * be already initialised beyond the old address limit.
                 */
                mm->context.slb_addr_limit = high_limit;
-#ifdef CONFIG_PPC64
-               core_flush_all_slbs(mm);
-#endif
+
+               on_each_cpu(slice_flush_segments, mm, 1);
        }
 
        /* Sanity checks */
@@ -652,10 +665,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
                (SLICE_NUM_HIGH &&
                 !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) {
                slice_convert(mm, &potential_mask, psize);
-#ifdef CONFIG_PPC64
                if (psize > MMU_PAGE_BASE)
-                       core_flush_all_slbs(mm);
-#endif
+                       on_each_cpu(slice_flush_segments, mm, 1);
        }
        return newaddr;
 
@@ -746,20 +757,6 @@ void slice_init_new_context_exec(struct mm_struct *mm)
                bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
 }
 
-#ifdef CONFIG_PPC_BOOK3S_64
-void slice_setup_new_exec(void)
-{
-       struct mm_struct *mm = current->mm;
-
-       slice_dbg("slice_setup_new_exec(mm=%p)\n", mm);
-
-       if (!is_32bit_task())
-               return;
-
-       mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
-}
-#endif
-
 void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
                           unsigned long len, unsigned int psize)
 {
index 694c1d92e7969b4fc67df0a28623a75381ca2b2b..c70d17c9a6ba921f09450a0e404d46fed4389e9c 100644 (file)
@@ -2394,9 +2394,7 @@ static void dump_one_paca(int cpu)
                        }
                }
                DUMP(p, vmalloc_sllp, "%#-*x");
-               DUMP(p, stab_rr, "%#-*x");
-               DUMP(p, slb_used_bitmap, "%#-*x");
-               DUMP(p, slb_kern_bitmap, "%#-*x");
+               DUMP(p, stab_rr, "%#-*llx");
 
                if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
                        DUMP(p, slb_cache_ptr, "%#-*x");