From 54be0b9c7c9888ebe63b89a31a17ee3df6a68d61 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 2 Oct 2018 23:56:39 +1000 Subject: [PATCH] Revert "convert SLB miss handlers to C" and subsequent commits This reverts commits: 5e46e29e6a97 ("powerpc/64s/hash: convert SLB miss handlers to C") 8fed04d0f6ae ("powerpc/64s/hash: remove user SLB data from the paca") 655deecf67b2 ("powerpc/64s/hash: SLB allocation status bitmaps") 2e1626744e8d ("powerpc/64s/hash: provide arch_setup_exec hooks for hash slice setup") 89ca4e126a3f ("powerpc/64s/hash: Add a SLB preload cache") This series had a few bugs, and the fixes are not all trivial. So revert most of it for now. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/asm-prototypes.h | 2 - arch/powerpc/include/asm/book3s/64/mmu-hash.h | 3 - arch/powerpc/include/asm/exception-64s.h | 8 + arch/powerpc/include/asm/paca.h | 19 +- arch/powerpc/include/asm/processor.h | 1 - arch/powerpc/include/asm/slice.h | 1 - arch/powerpc/include/asm/thread_info.h | 11 - arch/powerpc/kernel/asm-offsets.c | 11 +- arch/powerpc/kernel/exceptions-64s.S | 202 ++++++-- arch/powerpc/kernel/paca.c | 22 + arch/powerpc/kernel/process.c | 16 - arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/hash_utils_64.c | 46 +- arch/powerpc/mm/mmu_context.c | 3 +- arch/powerpc/mm/mmu_context_book3s64.c | 9 - arch/powerpc/mm/slb.c | 485 ++++++------------ arch/powerpc/mm/slb_low.S | 335 ++++++++++++ arch/powerpc/mm/slice.c | 43 +- arch/powerpc/xmon/xmon.c | 4 +- 19 files changed, 766 insertions(+), 457 deletions(-) create mode 100644 arch/powerpc/mm/slb_low.S diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 78ed3c3f879a..1f4691ce4126 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -78,8 +78,6 @@ void kernel_bad_stack(struct pt_regs *regs); void system_reset_exception(struct pt_regs *regs); void machine_check_exception(struct pt_regs *regs); void emulation_assist_interrupt(struct pt_regs *regs); -long do_slb_fault(struct pt_regs *regs, unsigned long ea); -void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err); /* signals, syscalls and interrupts */ long sys_swapcontext(struct ucontext __user *old_ctx, diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index bbeaf6adf93c..e0e4ce8f77d6 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -487,8 +487,6 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, extern void pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); -extern void hash__setup_new_exec(void); - #ifdef CONFIG_PPC_PSERIES void hpte_init_pseries(void); #else @@ -503,7 +501,6 @@ struct slb_entry { }; extern void slb_initialize(void); -extern void core_flush_all_slbs(struct mm_struct *mm); extern void slb_flush_and_rebolt(void); void slb_flush_all_realmode(void); void __slb_restore_bolted_realmode(void); diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 47578b79f0fb..a86feddddad0 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -60,6 +60,14 @@ */ #define MAX_MCE_DEPTH 4 +/* + * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR + * EX_CCR similarly with DSISR, but being 4 byte registers there is a hole + * in the save area so it's not necessary to overlap them. Could be used + * for future savings though if another 4 byte register was to be saved. + */ +#define EX_LR EX_DAR + /* * EX_R3 is only used by the bad_stack handler. bad_stack reloads and * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 6d6b3706232c..7b6e23af3808 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -113,10 +113,7 @@ struct paca_struct { * on the linear mapping */ /* SLB related definitions */ u16 vmalloc_sllp; - u8 slb_cache_ptr; - u8 stab_rr; /* stab/slb round-robin counter */ - u32 slb_used_bitmap; /* Bitmaps for first 32 SLB entries. */ - u32 slb_kern_bitmap; + u16 slb_cache_ptr; u32 slb_cache[SLB_CACHE_ENTRIES]; #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -146,11 +143,24 @@ struct paca_struct { struct tlb_core_data tcd; #endif /* CONFIG_PPC_BOOK3E */ +#ifdef CONFIG_PPC_BOOK3S + mm_context_id_t mm_ctx_id; +#ifdef CONFIG_PPC_MM_SLICES + unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; + unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE]; + unsigned long mm_ctx_slb_addr_limit; +#else + u16 mm_ctx_user_psize; + u16 mm_ctx_sllp; +#endif +#endif + /* * then miscellaneous read-write fields */ struct task_struct *__current; /* Pointer to current */ u64 kstack; /* Saved Kernel stack addr */ + u64 stab_rr; /* stab/slb round-robin counter */ u64 saved_r1; /* r1 save for RTAS calls or PM or EE=0 */ u64 saved_msr; /* MSR saved here by enter_rtas */ u16 trap_save; /* Used when bad stack is encountered */ @@ -248,6 +258,7 @@ struct paca_struct { #endif /* CONFIG_PPC_BOOK3S_64 */ } ____cacheline_aligned; +extern void copy_mm_to_paca(struct mm_struct *mm); extern struct paca_struct **paca_ptrs; extern void initialise_paca(struct paca_struct *new_paca, int cpu); extern void setup_paca(struct paca_struct *new_paca); diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 350c584ca179..52fadded5c1e 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -273,7 +273,6 @@ struct thread_struct { #endif /* CONFIG_HAVE_HW_BREAKPOINT */ struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */ unsigned long trap_nr; /* last trap # on this thread */ - u8 load_slb; /* Ages out SLB preload cache entries */ u8 load_fp; #ifdef CONFIG_ALTIVEC u8 load_vec; diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h index a595461c9cb0..e40406cf5628 100644 --- a/arch/powerpc/include/asm/slice.h +++ b/arch/powerpc/include/asm/slice.h @@ -32,7 +32,6 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start, unsigned long len, unsigned int psize); void slice_init_new_context_exec(struct mm_struct *mm); -void slice_setup_new_exec(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 406eb952b808..3c0002044bc9 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -29,7 +29,6 @@ #include #include -#define SLB_PRELOAD_NR 16U /* * low level task data. */ @@ -45,10 +44,6 @@ struct thread_info { #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC32) struct cpu_accounting_data accounting; #endif - u8 slb_preload_nr; - u8 slb_preload_tail; - u32 slb_preload_esid[SLB_PRELOAD_NR]; - /* low level flags - has atomic operations done on it */ unsigned long flags ____cacheline_aligned_in_smp; }; @@ -77,12 +72,6 @@ static inline struct thread_info *current_thread_info(void) } extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); - -#ifdef CONFIG_PPC_BOOK3S_64 -void arch_setup_new_exec(void); -#define arch_setup_new_exec arch_setup_new_exec -#endif - #endif /* __ASSEMBLY__ */ /* diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index ba9d0fc98730..89cf15566c4e 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -173,6 +173,7 @@ int main(void) OFFSET(PACAKSAVE, paca_struct, kstack); OFFSET(PACACURRENT, paca_struct, __current); OFFSET(PACASAVEDMSR, paca_struct, saved_msr); + OFFSET(PACASTABRR, paca_struct, stab_rr); OFFSET(PACAR1, paca_struct, saved_r1); OFFSET(PACATOC, paca_struct, kernel_toc); OFFSET(PACAKBASE, paca_struct, kernelbase); @@ -180,6 +181,15 @@ int main(void) OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask); OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened); OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled); +#ifdef CONFIG_PPC_BOOK3S + OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id); +#ifdef CONFIG_PPC_MM_SLICES + OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize); + OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize); + OFFSET(PACA_SLB_ADDR_LIMIT, paca_struct, mm_ctx_slb_addr_limit); + DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def)); +#endif /* CONFIG_PPC_MM_SLICES */ +#endif #ifdef CONFIG_PPC_BOOK3E OFFSET(PACAPGD, paca_struct, pgd); @@ -202,7 +212,6 @@ int main(void) #ifdef CONFIG_PPC_BOOK3S_64 OFFSET(PACASLBCACHE, paca_struct, slb_cache); OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr); - OFFSET(PACASTABRR, paca_struct, stab_rr); OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp); #ifdef CONFIG_PPC_MM_SLICES OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 786f4fa5100a..301a6a86a20f 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -596,36 +596,28 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) -EXCEPTION_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, KVMTEST_PR, 0x380); + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXSLB) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380) + mr r12,r3 /* save r3 */ + mfspr r3,SPRN_DAR + mfspr r11,SPRN_SRR1 + crset 4*cr6+eq + BRANCH_TO_COMMON(r10, slb_miss_common) EXC_REAL_END(data_access_slb, 0x380, 0x80) EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) -EXCEPTION_RELON_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, NOTEST, 0x380); + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXSLB) + EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380) + mr r12,r3 /* save r3 */ + mfspr r3,SPRN_DAR + mfspr r11,SPRN_SRR1 + crset 4*cr6+eq + BRANCH_TO_COMMON(r10, slb_miss_common) EXC_VIRT_END(data_access_slb, 0x4380, 0x80) - TRAMP_KVM_SKIP(PACA_EXSLB, 0x380) -EXC_COMMON_BEGIN(data_access_slb_common) - mfspr r10,SPRN_DAR - std r10,PACA_EXSLB+EX_DAR(r13) - EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB) - ld r4,PACA_EXSLB+EX_DAR(r13) - std r4,_DAR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_slb_fault - cmpdi r3,0 - bne- 1f - b fast_exception_return -1: /* Error case */ - std r3,RESULT(r1) - bl save_nvgprs - RECONCILE_IRQ_STATE(r10, r11) - ld r4,_DAR(r1) - ld r5,RESULT(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_bad_slb_fault - b ret_from_except - EXC_REAL(instruction_access, 0x400, 0x80) EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400) @@ -648,34 +640,160 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80) -EXCEPTION_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, KVMTEST_PR, 0x480); + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXSLB) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) + mr r12,r3 /* save r3 */ + mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ + mfspr r11,SPRN_SRR1 + crclr 4*cr6+eq + BRANCH_TO_COMMON(r10, slb_miss_common) EXC_REAL_END(instruction_access_slb, 0x480, 0x80) EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) -EXCEPTION_RELON_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, NOTEST, 0x480); + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXSLB) + EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480) + mr r12,r3 /* save r3 */ + mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ + mfspr r11,SPRN_SRR1 + crclr 4*cr6+eq + BRANCH_TO_COMMON(r10, slb_miss_common) EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) - TRAMP_KVM(PACA_EXSLB, 0x480) -EXC_COMMON_BEGIN(instruction_access_slb_common) - EXCEPTION_PROLOG_COMMON(0x480, PACA_EXSLB) - ld r4,_NIP(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_slb_fault - cmpdi r3,0 - bne- 1f - b fast_exception_return -1: /* Error case */ - std r3,RESULT(r1) + +/* + * This handler is used by the 0x380 and 0x480 SLB miss interrupts, as well as + * the virtual mode 0x4380 and 0x4480 interrupts if AIL is enabled. + */ +EXC_COMMON_BEGIN(slb_miss_common) + /* + * r13 points to the PACA, r9 contains the saved CR, + * r12 contains the saved r3, + * r11 contain the saved SRR1, SRR0 is still ready for return + * r3 has the faulting address + * r9 - r13 are saved in paca->exslb. + * cr6.eq is set for a D-SLB miss, clear for a I-SLB miss + * We assume we aren't going to take any exceptions during this + * procedure. + */ + mflr r10 + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ + std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ + + andi. r9,r11,MSR_PR // Check for exception from userspace + cmpdi cr4,r9,MSR_PR // And save the result in CR4 for later + + /* + * Test MSR_RI before calling slb_allocate_realmode, because the + * MSR in r11 gets clobbered. However we still want to allocate + * SLB in case MSR_RI=0, to minimise the risk of getting stuck in + * recursive SLB faults. So use cr5 for this, which is preserved. + */ + andi. r11,r11,MSR_RI /* check for unrecoverable exception */ + cmpdi cr5,r11,MSR_RI + + crset 4*cr0+eq +#ifdef CONFIG_PPC_BOOK3S_64 +BEGIN_MMU_FTR_SECTION + bl slb_allocate +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) +#endif + + ld r10,PACA_EXSLB+EX_LR(r13) + lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ + mtlr r10 + + /* + * Large address, check whether we have to allocate new contexts. + */ + beq- 8f + + bne- cr5,2f /* if unrecoverable exception, oops */ + + /* All done -- return from exception. */ + + bne cr4,1f /* returning to kernel */ + + mtcrf 0x80,r9 + mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ + mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ + mtcrf 0x02,r9 /* I/D indication is in cr6 */ + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ + + RESTORE_CTR(r9, PACA_EXSLB) + RESTORE_PPR_PACA(PACA_EXSLB, r9) + mr r3,r12 + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + RFI_TO_USER + b . /* prevent speculative execution */ +1: + mtcrf 0x80,r9 + mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ + mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ + mtcrf 0x02,r9 /* I/D indication is in cr6 */ + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ + + RESTORE_CTR(r9, PACA_EXSLB) + RESTORE_PPR_PACA(PACA_EXSLB, r9) + mr r3,r12 + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + RFI_TO_KERNEL + b . /* prevent speculative execution */ + + +2: std r3,PACA_EXSLB+EX_DAR(r13) + mr r3,r12 + mfspr r11,SPRN_SRR0 + mfspr r12,SPRN_SRR1 + LOAD_HANDLER(r10,unrecov_slb) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + mtspr SPRN_SRR1,r10 + RFI_TO_KERNEL + b . + +8: std r3,PACA_EXSLB+EX_DAR(r13) + mr r3,r12 + mfspr r11,SPRN_SRR0 + mfspr r12,SPRN_SRR1 + LOAD_HANDLER(r10, large_addr_slb) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + mtspr SPRN_SRR1,r10 + RFI_TO_KERNEL + b . + +EXC_COMMON_BEGIN(unrecov_slb) + EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) + RECONCILE_IRQ_STATE(r10, r11) bl save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl unrecoverable_exception + b 1b + +EXC_COMMON_BEGIN(large_addr_slb) + EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB) RECONCILE_IRQ_STATE(r10, r11) - ld r4,_NIP(r1) - ld r5,RESULT(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_bad_slb_fault + ld r3, PACA_EXSLB+EX_DAR(r13) + std r3, _DAR(r1) + beq cr6, 2f + li r10, 0x481 /* fix trap number for I-SLB miss */ + std r10, _TRAP(r1) +2: bl save_nvgprs + addi r3, r1, STACK_FRAME_OVERHEAD + bl slb_miss_large_addr b ret_from_except - EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) .globl hardware_interrupt_hv; hardware_interrupt_hv: diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 0cf84e30d1cd..0ee3e6d50f28 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -258,3 +258,25 @@ void __init free_unused_pacas(void) printk(KERN_DEBUG "Allocated %u bytes for %u pacas\n", paca_ptrs_size + paca_struct_size, nr_cpu_ids); } + +void copy_mm_to_paca(struct mm_struct *mm) +{ +#ifdef CONFIG_PPC_BOOK3S + mm_context_t *context = &mm->context; + + get_paca()->mm_ctx_id = context->id; +#ifdef CONFIG_PPC_MM_SLICES + VM_BUG_ON(!mm->context.slb_addr_limit); + get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit; + memcpy(&get_paca()->mm_ctx_low_slices_psize, + &context->low_slices_psize, sizeof(context->low_slices_psize)); + memcpy(&get_paca()->mm_ctx_high_slices_psize, + &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm)); +#else /* CONFIG_PPC_MM_SLICES */ + get_paca()->mm_ctx_user_psize = context->user_psize; + get_paca()->mm_ctx_sllp = context->sllp; +#endif +#else /* !CONFIG_PPC_BOOK3S */ + return; +#endif +} diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 03c2e1f134bc..913c5725cdb2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1482,15 +1482,6 @@ void flush_thread(void) #endif /* CONFIG_HAVE_HW_BREAKPOINT */ } -#ifdef CONFIG_PPC_BOOK3S_64 -void arch_setup_new_exec(void) -{ - if (radix_enabled()) - return; - hash__setup_new_exec(); -} -#endif - int set_thread_uses_vas(void) { #ifdef CONFIG_PPC_BOOK3S_64 @@ -1719,8 +1710,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, return 0; } -void preload_new_slb_context(unsigned long start, unsigned long sp); - /* * Set up a thread for executing a new program */ @@ -1728,10 +1717,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) { #ifdef CONFIG_PPC64 unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */ - -#ifdef CONFIG_PPC_BOOK3S_64 - preload_new_slb_context(start, sp); -#endif #endif /* @@ -1822,7 +1807,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) #ifdef CONFIG_VSX current->thread.used_vsr = 0; #endif - current->thread.load_slb = 0; current->thread.load_fp = 0; memset(¤t->thread.fp_state, 0, sizeof(current->thread.fp_state)); current->thread.fp_save_area = NULL; diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 892d4e061d62..cdf6a9960046 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -15,7 +15,7 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o -obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o +obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 88c95dc8b141..f23a89d8e4ce 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1088,16 +1088,16 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) } #ifdef CONFIG_PPC_MM_SLICES -static unsigned int get_psize(struct mm_struct *mm, unsigned long addr) +static unsigned int get_paca_psize(unsigned long addr) { unsigned char *psizes; unsigned long index, mask_index; if (addr < SLICE_LOW_TOP) { - psizes = mm->context.low_slices_psize; + psizes = get_paca()->mm_ctx_low_slices_psize; index = GET_LOW_SLICE_INDEX(addr); } else { - psizes = mm->context.high_slices_psize; + psizes = get_paca()->mm_ctx_high_slices_psize; index = GET_HIGH_SLICE_INDEX(addr); } mask_index = index & 0x1; @@ -1105,9 +1105,9 @@ static unsigned int get_psize(struct mm_struct *mm, unsigned long addr) } #else -unsigned int get_psize(struct mm_struct *mm, unsigned long addr) +unsigned int get_paca_psize(unsigned long addr) { - return mm->context.user_psize; + return get_paca()->mm_ctx_user_psize; } #endif @@ -1118,11 +1118,15 @@ unsigned int get_psize(struct mm_struct *mm, unsigned long addr) #ifdef CONFIG_PPC_64K_PAGES void demote_segment_4k(struct mm_struct *mm, unsigned long addr) { - if (get_psize(mm, addr) == MMU_PAGE_4K) + if (get_slice_psize(mm, addr) == MMU_PAGE_4K) return; slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); copro_flush_all_slbs(mm); - core_flush_all_slbs(mm); + if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { + + copy_mm_to_paca(mm); + slb_flush_and_rebolt(); + } } #endif /* CONFIG_PPC_64K_PAGES */ @@ -1187,6 +1191,22 @@ void hash_failure_debug(unsigned long ea, unsigned long access, trap, vsid, ssize, psize, lpsize, pte); } +static void check_paca_psize(unsigned long ea, struct mm_struct *mm, + int psize, bool user_region) +{ + if (user_region) { + if (psize != get_paca_psize(ea)) { + copy_mm_to_paca(mm); + slb_flush_and_rebolt(); + } + } else if (get_paca()->vmalloc_sllp != + mmu_psize_defs[mmu_vmalloc_psize].sllp) { + get_paca()->vmalloc_sllp = + mmu_psize_defs[mmu_vmalloc_psize].sllp; + slb_vmalloc_update(); + } +} + /* Result code is: * 0 - handled * 1 - normal page fault @@ -1219,7 +1239,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, rc = 1; goto bail; } - psize = get_psize(mm, ea); + psize = get_slice_psize(mm, ea); ssize = user_segment_size(ea); vsid = get_user_vsid(&mm->context, ea, ssize); break; @@ -1307,6 +1327,9 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, WARN_ON(1); } #endif + if (current->mm == mm) + check_paca_psize(ea, mm, psize, user_region); + goto bail; } @@ -1341,14 +1364,15 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, "to 4kB pages because of " "non-cacheable mapping\n"); psize = mmu_vmalloc_psize = MMU_PAGE_4K; - slb_vmalloc_update(); copro_flush_all_slbs(mm); - core_flush_all_slbs(mm); } } #endif /* CONFIG_PPC_64K_PAGES */ + if (current->mm == mm) + check_paca_psize(ea, mm, psize, user_region); + #ifdef CONFIG_PPC_64K_PAGES if (psize == MMU_PAGE_64K) rc = __hash_page_64K(ea, access, vsid, ptep, trap, @@ -1436,7 +1460,7 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap, #ifdef CONFIG_PPC_MM_SLICES static bool should_hash_preload(struct mm_struct *mm, unsigned long ea) { - int psize = get_psize(mm, ea); + int psize = get_slice_psize(mm, ea); /* We only prefault standard pages for now */ if (unlikely(psize != mm->context.user_psize)) diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index 28ae2835db3d..f84e14f23e50 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -54,7 +54,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * MMU context id, which is then moved to SPRN_PID. * * For the hash MMU it is either the first load from slb_cache - * in switch_slb(), and/or load of MMU context id. + * in switch_slb(), and/or the store of paca->mm_ctx_id in + * copy_mm_to_paca(). * * On the other side, the barrier is in mm/tlb-radix.c for * radix which orders earlier stores to clear the PTEs vs diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 510f103d7813..dbd8f762140b 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -53,8 +53,6 @@ int hash__alloc_context_id(void) } EXPORT_SYMBOL_GPL(hash__alloc_context_id); -void slb_setup_new_exec(void); - static int hash__init_new_context(struct mm_struct *mm) { int index; @@ -86,13 +84,6 @@ static int hash__init_new_context(struct mm_struct *mm) return index; } -void hash__setup_new_exec(void) -{ - slice_setup_new_exec(); - - slb_setup_new_exec(); -} - static int radix__init_new_context(struct mm_struct *mm) { unsigned long rts_field; diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index b438220c4336..513c6596140d 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -14,7 +14,6 @@ * 2 of the License, or (at your option) any later version. */ -#include #include #include #include @@ -34,7 +33,7 @@ enum slb_index { KSTACK_INDEX = 1, /* Kernel stack map */ }; -static long slb_allocate_user(struct mm_struct *mm, unsigned long ea); +extern void slb_allocate(unsigned long ea); #define slb_esid_mask(ssize) \ (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T) @@ -45,17 +44,11 @@ static inline unsigned long mk_esid_data(unsigned long ea, int ssize, return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index; } -static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize, - unsigned long flags) -{ - return (vsid << slb_vsid_shift(ssize)) | flags | - ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); -} - static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, unsigned long flags) { - return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); + return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags | + ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); } static inline void slb_shadow_update(unsigned long ea, int ssize, @@ -122,9 +115,6 @@ void slb_restore_bolted_realmode(void) { __slb_restore_bolted_realmode(); get_paca()->slb_cache_ptr = 0; - - get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; - get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap; } /* @@ -132,6 +122,9 @@ void slb_restore_bolted_realmode(void) */ void slb_flush_all_realmode(void) { + /* + * This flushes all SLB entries including 0, so it must be realmode. + */ asm volatile("slbmte %0,%0; slbia" : : "r" (0)); } @@ -177,9 +170,6 @@ void slb_flush_and_rebolt(void) : "memory"); get_paca()->slb_cache_ptr = 0; - - get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; - get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap; } void slb_save_contents(struct slb_entry *slb_ptr) @@ -212,7 +202,7 @@ void slb_dump_contents(struct slb_entry *slb_ptr) return; pr_err("SLB contents of cpu 0x%x\n", smp_processor_id()); - pr_err("Last SLB entry inserted at slot %u\n", get_paca()->stab_rr); + pr_err("Last SLB entry inserted at slot %lld\n", get_paca()->stab_rr); for (i = 0; i < mmu_slb_size; i++) { e = slb_ptr->esid; @@ -257,119 +247,41 @@ void slb_vmalloc_update(void) slb_flush_and_rebolt(); } -static bool preload_hit(struct thread_info *ti, unsigned long esid) -{ - u8 i; - - for (i = 0; i < ti->slb_preload_nr; i++) { - u8 idx; - - idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR; - if (esid == ti->slb_preload_esid[idx]) - return true; - } - return false; -} - -static bool preload_add(struct thread_info *ti, unsigned long ea) -{ - unsigned long esid; - u8 idx; - - if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) { - /* EAs are stored >> 28 so 256MB segments don't need clearing */ - if (ea & ESID_MASK_1T) - ea &= ESID_MASK_1T; - } - - esid = ea >> SID_SHIFT; - - if (preload_hit(ti, esid)) - return false; - - idx = (ti->slb_preload_tail + ti->slb_preload_nr) % SLB_PRELOAD_NR; - ti->slb_preload_esid[idx] = esid; - if (ti->slb_preload_nr == SLB_PRELOAD_NR) - ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR; - else - ti->slb_preload_nr++; - - return true; -} - -static void preload_age(struct thread_info *ti) -{ - if (!ti->slb_preload_nr) - return; - ti->slb_preload_nr--; - ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR; -} - -void slb_setup_new_exec(void) +/* Helper function to compare esids. There are four cases to handle. + * 1. The system is not 1T segment size capable. Use the GET_ESID compare. + * 2. The system is 1T capable, both addresses are < 1T, use the GET_ESID compare. + * 3. The system is 1T capable, only one of the two addresses is > 1T. This is not a match. + * 4. The system is 1T capable, both addresses are > 1T, use the GET_ESID_1T macro to compare. + */ +static inline int esids_match(unsigned long addr1, unsigned long addr2) { - struct thread_info *ti = current_thread_info(); - struct mm_struct *mm = current->mm; - unsigned long exec = 0x10000000; + int esid_1t_count; - /* - * We have no good place to clear the slb preload cache on exec, - * flush_thread is about the earliest arch hook but that happens - * after we switch to the mm and have aleady preloaded the SLBEs. - * - * For the most part that's probably okay to use entries from the - * previous exec, they will age out if unused. It may turn out to - * be an advantage to clear the cache before switching to it, - * however. - */ - - /* - * preload some userspace segments into the SLB. - * Almost all 32 and 64bit PowerPC executables are linked at - * 0x10000000 so it makes sense to preload this segment. - */ - if (!is_kernel_addr(exec)) { - if (preload_add(ti, exec)) - slb_allocate_user(mm, exec); - } - - /* Libraries and mmaps. */ - if (!is_kernel_addr(mm->mmap_base)) { - if (preload_add(ti, mm->mmap_base)) - slb_allocate_user(mm, mm->mmap_base); - } -} + /* System is not 1T segment size capable. */ + if (!mmu_has_feature(MMU_FTR_1T_SEGMENT)) + return (GET_ESID(addr1) == GET_ESID(addr2)); -void preload_new_slb_context(unsigned long start, unsigned long sp) -{ - struct thread_info *ti = current_thread_info(); - struct mm_struct *mm = current->mm; - unsigned long heap = mm->start_brk; + esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) + + ((addr2 >> SID_SHIFT_1T) != 0)); - /* Userspace entry address. */ - if (!is_kernel_addr(start)) { - if (preload_add(ti, start)) - slb_allocate_user(mm, start); - } + /* both addresses are < 1T */ + if (esid_1t_count == 0) + return (GET_ESID(addr1) == GET_ESID(addr2)); - /* Top of stack, grows down. */ - if (!is_kernel_addr(sp)) { - if (preload_add(ti, sp)) - slb_allocate_user(mm, sp); - } + /* One address < 1T, the other > 1T. Not a match */ + if (esid_1t_count == 1) + return 0; - /* Bottom of heap, grows up. */ - if (heap && !is_kernel_addr(heap)) { - if (preload_add(ti, heap)) - slb_allocate_user(mm, heap); - } + /* Both addresses are > 1T. */ + return (GET_ESID_1T(addr1) == GET_ESID_1T(addr2)); } - /* Flush all user entries from the segment table of the current processor. */ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) { - struct thread_info *ti = task_thread_info(tsk); - u8 i; + unsigned long pc = KSTK_EIP(tsk); + unsigned long stack = KSTK_ESP(tsk); + unsigned long exec_base; /* * We need interrupts hard-disabled here, not just soft-disabled, @@ -392,6 +304,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) && offset <= SLB_CACHE_ENTRIES) { unsigned long slbie_data = 0; + int i; asm volatile("isync" : : : "memory"); for (i = 0; i < offset; i++) { @@ -422,60 +335,67 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) "isync" :: "r"(ksp_vsid_data), "r"(ksp_esid_data)); - - get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; } get_paca()->slb_cache_ptr = 0; } - get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap; + + copy_mm_to_paca(mm); /* - * We gradually age out SLBs after a number of context switches to - * reduce reload overhead of unused entries (like we do with FP/VEC - * reload). Each time we wrap 256 switches, take an entry out of the - * SLB preload cache. + * preload some userspace segments into the SLB. + * Almost all 32 and 64bit PowerPC executables are linked at + * 0x10000000 so it makes sense to preload this segment. */ - tsk->thread.load_slb++; - if (!tsk->thread.load_slb) { - unsigned long pc = KSTK_EIP(tsk); + exec_base = 0x10000000; - preload_age(ti); - preload_add(ti, pc); - } + if (is_kernel_addr(pc) || is_kernel_addr(stack) || + is_kernel_addr(exec_base)) + return; - for (i = 0; i < ti->slb_preload_nr; i++) { - unsigned long ea; - u8 idx; + slb_allocate(pc); - idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR; - ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT; + if (!esids_match(pc, stack)) + slb_allocate(stack); - slb_allocate_user(mm, ea); - } + if (!esids_match(pc, exec_base) && + !esids_match(stack, exec_base)) + slb_allocate(exec_base); } -void slb_set_size(u16 size) +static inline void patch_slb_encoding(unsigned int *insn_addr, + unsigned int immed) { - mmu_slb_size = size; + + /* + * This function patches either an li or a cmpldi instruction with + * a new immediate value. This relies on the fact that both li + * (which is actually addi) and cmpldi both take a 16-bit immediate + * value, and it is situated in the same location in the instruction, + * ie. bits 16-31 (Big endian bit order) or the lower 16 bits. + * The signedness of the immediate operand differs between the two + * instructions however this code is only ever patching a small value, + * much less than 1 << 15, so we can get away with it. + * To patch the value we read the existing instruction, clear the + * immediate value, and or in our new value, then write the instruction + * back. + */ + unsigned int insn = (*insn_addr & 0xffff0000) | immed; + patch_instruction(insn_addr, insn); } -static void cpu_flush_slb(void *parm) -{ - struct mm_struct *mm = parm; - unsigned long flags; +extern u32 slb_miss_kernel_load_linear[]; +extern u32 slb_miss_kernel_load_io[]; +extern u32 slb_compare_rr_to_size[]; +extern u32 slb_miss_kernel_load_vmemmap[]; - if (mm != current->active_mm) +void slb_set_size(u16 size) +{ + if (mmu_slb_size == size) return; - local_irq_save(flags); - slb_flush_and_rebolt(); - local_irq_restore(flags); -} - -void core_flush_all_slbs(struct mm_struct *mm) -{ - on_each_cpu(cpu_flush_slb, mm, 1); + mmu_slb_size = size; + patch_slb_encoding(slb_compare_rr_to_size, mmu_slb_size); } void slb_initialize(void) @@ -497,16 +417,24 @@ void slb_initialize(void) #endif if (!slb_encoding_inited) { slb_encoding_inited = 1; + patch_slb_encoding(slb_miss_kernel_load_linear, + SLB_VSID_KERNEL | linear_llp); + patch_slb_encoding(slb_miss_kernel_load_io, + SLB_VSID_KERNEL | io_llp); + patch_slb_encoding(slb_compare_rr_to_size, + mmu_slb_size); + pr_devel("SLB: linear LLP = %04lx\n", linear_llp); pr_devel("SLB: io LLP = %04lx\n", io_llp); + #ifdef CONFIG_SPARSEMEM_VMEMMAP + patch_slb_encoding(slb_miss_kernel_load_vmemmap, + SLB_VSID_KERNEL | vmemmap_llp); pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp); #endif } get_paca()->stab_rr = SLB_NUM_BOLTED - 1; - get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1; - get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap; lflags = SLB_VSID_KERNEL | linear_llp; @@ -530,13 +458,52 @@ void slb_initialize(void) asm volatile("isync":::"memory"); } -static void slb_cache_update(unsigned long esid_data) +static void insert_slb_entry(unsigned long vsid, unsigned long ea, + int bpsize, int ssize) { + unsigned long flags, vsid_data, esid_data; + enum slb_index index; int slb_cache_index; if (cpu_has_feature(CPU_FTR_ARCH_300)) return; /* ISAv3.0B and later does not use slb_cache */ + /* + * We are irq disabled, hence should be safe to access PACA. + */ + VM_WARN_ON(!irqs_disabled()); + + /* + * We can't take a PMU exception in the following code, so hard + * disable interrupts. + */ + hard_irq_disable(); + + index = get_paca()->stab_rr; + + /* + * simple round-robin replacement of slb starting at SLB_NUM_BOLTED. + */ + if (index < (mmu_slb_size - 1)) + index++; + else + index = SLB_NUM_BOLTED; + + get_paca()->stab_rr = index; + + flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp; + vsid_data = (vsid << slb_vsid_shift(ssize)) | flags | + ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT); + esid_data = mk_esid_data(ea, ssize, index); + + /* + * No need for an isync before or after this slbmte. The exception + * we enter with and the rfid we exit with are context synchronizing. + * Also we only handle user segments here. + */ + asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data) + : "memory"); + /* * Now update slb cache entries */ @@ -558,196 +525,58 @@ static void slb_cache_update(unsigned long esid_data) } } -static enum slb_index alloc_slb_index(bool kernel) +static void handle_multi_context_slb_miss(int context_id, unsigned long ea) { - enum slb_index index; - - /* - * The allocation bitmaps can become out of synch with the SLB - * when the _switch code does slbie when bolting a new stack - * segment and it must not be anywhere else in the SLB. This leaves - * a kernel allocated entry that is unused in the SLB. With very - * large systems or small segment sizes, the bitmaps could slowly - * fill with these entries. They will eventually be cleared out - * by the round robin allocator in that case, so it's probably not - * worth accounting for. - */ + struct mm_struct *mm = current->mm; + unsigned long vsid; + int bpsize; /* - * SLBs beyond 32 entries are allocated with stab_rr only - * POWER7/8/9 have 32 SLB entries, this could be expanded if a - * future CPU has more. + * We are always above 1TB, hence use high user segment size. */ - if (get_paca()->slb_used_bitmap != U32_MAX) { - index = ffz(get_paca()->slb_used_bitmap); - get_paca()->slb_used_bitmap |= 1U << index; - if (kernel) - get_paca()->slb_kern_bitmap |= 1U << index; - } else { - /* round-robin replacement of slb starting at SLB_NUM_BOLTED. */ - index = get_paca()->stab_rr; - if (index < (mmu_slb_size - 1)) - index++; - else - index = SLB_NUM_BOLTED; - get_paca()->stab_rr = index; - if (index < 32) { - if (kernel) - get_paca()->slb_kern_bitmap |= 1U << index; - else - get_paca()->slb_kern_bitmap &= ~(1U << index); - } - } - BUG_ON(index < SLB_NUM_BOLTED); - - return index; + vsid = get_vsid(context_id, ea, mmu_highuser_ssize); + bpsize = get_slice_psize(mm, ea); + insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize); } -static long slb_insert_entry(unsigned long ea, unsigned long context, - unsigned long flags, int ssize, bool kernel) +void slb_miss_large_addr(struct pt_regs *regs) { - unsigned long vsid; - unsigned long vsid_data, esid_data; - enum slb_index index; - - vsid = get_vsid(context, ea, ssize); - if (!vsid) - return -EFAULT; + enum ctx_state prev_state = exception_enter(); + unsigned long ea = regs->dar; + int context; - index = alloc_slb_index(kernel); - - vsid_data = __mk_vsid_data(vsid, ssize, flags); - esid_data = mk_esid_data(ea, ssize, index); + if (REGION_ID(ea) != USER_REGION_ID) + goto slb_bad_addr; /* - * No need for an isync before or after this slbmte. The exception - * we enter with and the rfid we exit with are context synchronizing. - * Also we only handle user segments here. + * Are we beyound what the page table layout supports ? */ - asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)); + if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE) + goto slb_bad_addr; - if (!kernel) - slb_cache_update(esid_data); - - return 0; -} - -static long slb_allocate_kernel(unsigned long ea, unsigned long id) -{ - unsigned long context; - unsigned long flags; - int ssize; - - if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT)) - return -EFAULT; - - if (id == KERNEL_REGION_ID) { - flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp; -#ifdef CONFIG_SPARSEMEM_VMEMMAP - } else if (id == VMEMMAP_REGION_ID) { - flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp; -#endif - } else if (id == VMALLOC_REGION_ID) { - if (ea < H_VMALLOC_END) - flags = get_paca()->vmalloc_sllp; - else - flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp; - } else { - return -EFAULT; - } - - ssize = MMU_SEGSIZE_1T; - if (!mmu_has_feature(MMU_FTR_1T_SEGMENT)) - ssize = MMU_SEGSIZE_256M; - - context = id - KERNEL_REGION_CONTEXT_OFFSET; - - return slb_insert_entry(ea, context, flags, ssize, true); -} - -static long slb_allocate_user(struct mm_struct *mm, unsigned long ea) -{ - unsigned long context; - unsigned long flags; - int bpsize; - int ssize; + /* Lower address should have been handled by asm code */ + if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT)) + goto slb_bad_addr; /* * consider this as bad access if we take a SLB miss * on an address above addr limit. */ - if (ea >= mm->context.slb_addr_limit) - return -EFAULT; + if (ea >= current->mm->context.slb_addr_limit) + goto slb_bad_addr; - context = get_ea_context(&mm->context, ea); + context = get_ea_context(¤t->mm->context, ea); if (!context) - return -EFAULT; - - if (unlikely(ea >= H_PGTABLE_RANGE)) { - WARN_ON(1); - return -EFAULT; - } - - ssize = user_segment_size(ea); - - bpsize = get_slice_psize(mm, ea); - flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp; - - return slb_insert_entry(ea, context, flags, ssize, false); -} - -long do_slb_fault(struct pt_regs *regs, unsigned long ea) -{ - unsigned long id = REGION_ID(ea); - - /* IRQs are not reconciled here, so can't check irqs_disabled */ - VM_WARN_ON(mfmsr() & MSR_EE); - - if (unlikely(!(regs->msr & MSR_RI))) - return -EINVAL; - - /* - * SLB kernel faults must be very careful not to touch anything - * that is not bolted. E.g., PACA and global variables are okay, - * mm->context stuff is not. - * - * SLB user faults can access all of kernel memory, but must be - * careful not to touch things like IRQ state because it is not - * "reconciled" here. The difficulty is that we must use - * fast_exception_return to return from kernel SLB faults without - * looking at possible non-bolted memory. We could test user vs - * kernel faults in the interrupt handler asm and do a full fault, - * reconcile, ret_from_except for user faults which would make them - * first class kernel code. But for performance it's probably nicer - * if they go via fast_exception_return too. - */ - if (id >= KERNEL_REGION_ID) { - return slb_allocate_kernel(ea, id); - } else { - struct mm_struct *mm = current->mm; - long err; - - if (unlikely(!mm)) - return -EFAULT; + goto slb_bad_addr; - err = slb_allocate_user(mm, ea); - if (!err) - preload_add(current_thread_info(), ea); - - return err; - } -} + handle_multi_context_slb_miss(context, ea); + exception_exit(prev_state); + return; -void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err) -{ - if (err == -EFAULT) { - if (user_mode(regs)) - _exception(SIGSEGV, regs, SEGV_BNDERR, ea); - else - bad_page_fault(regs, ea, SIGSEGV); - } else if (err == -EINVAL) { - unrecoverable_exception(regs); - } else { - BUG(); - } +slb_bad_addr: + if (user_mode(regs)) + _exception(SIGSEGV, regs, SEGV_BNDERR, ea); + else + bad_page_fault(regs, ea, SIGSEGV); + exception_exit(prev_state); } diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S new file mode 100644 index 000000000000..4d2e921d696e --- /dev/null +++ b/arch/powerpc/mm/slb_low.S @@ -0,0 +1,335 @@ +/* + * Low-level SLB routines + * + * Copyright (C) 2004 David Gibson , IBM + * + * Based on earlier C version: + * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com + * Copyright (c) 2001 Dave Engebretsen + * Copyright (C) 2002 Anton Blanchard , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This macro generates asm code to compute the VSID scramble + * function. Used in slb_allocate() and do_stab_bolted. The function + * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS + * + * rt = register containing the proto-VSID and into which the + * VSID will be stored + * rx = scratch register (clobbered) + * rf = flags + * + * - rt and rx must be different registers + * - The answer will end up in the low VSID_BITS bits of rt. The higher + * bits may contain other garbage, so you may need to mask the + * result. + */ +#define ASM_VSID_SCRAMBLE(rt, rx, rf, size) \ + lis rx,VSID_MULTIPLIER_##size@h; \ + ori rx,rx,VSID_MULTIPLIER_##size@l; \ + mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \ +/* \ + * powermac get slb fault before feature fixup, so make 65 bit part \ + * the default part of feature fixup \ + */ \ +BEGIN_MMU_FTR_SECTION \ + srdi rx,rt,VSID_BITS_65_##size; \ + clrldi rt,rt,(64-VSID_BITS_65_##size); \ + add rt,rt,rx; \ + addi rx,rt,1; \ + srdi rx,rx,VSID_BITS_65_##size; \ + add rt,rt,rx; \ + rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_65_##size)); \ +MMU_FTR_SECTION_ELSE \ + srdi rx,rt,VSID_BITS_##size; \ + clrldi rt,rt,(64-VSID_BITS_##size); \ + add rt,rt,rx; /* add high and low bits */ \ + addi rx,rt,1; \ + srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \ + add rt,rt,rx; \ + rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_##size)); \ +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA) + + +/* void slb_allocate(unsigned long ea); + * + * Create an SLB entry for the given EA (user or kernel). + * r3 = faulting address, r13 = PACA + * r9, r10, r11 are clobbered by this function + * r3 is preserved. + * No other registers are examined or changed. + */ +_GLOBAL(slb_allocate) + /* + * Check if the address falls within the range of the first context, or + * if we may need to handle multi context. For the first context we + * allocate the slb entry via the fast path below. For large address we + * branch out to C-code and see if additional contexts have been + * allocated. + * The test here is: + * (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT) + */ + rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4) + bne- 8f + + srdi r9,r3,60 /* get region */ + srdi r10,r3,SID_SHIFT /* get esid */ + cmpldi cr7,r9,0xc /* cmp PAGE_OFFSET for later use */ + + /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */ + blt cr7,0f /* user or kernel? */ + + /* Check if hitting the linear mapping or some other kernel space + */ + bne cr7,1f + + /* Linear mapping encoding bits, the "li" instruction below will + * be patched by the kernel at boot + */ +.globl slb_miss_kernel_load_linear +slb_miss_kernel_load_linear: + li r11,0 + /* + * context = (ea >> 60) - (0xc - 1) + * r9 = region id. + */ + subi r9,r9,KERNEL_REGION_CONTEXT_OFFSET + +BEGIN_FTR_SECTION + b .Lslb_finish_load +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) + b .Lslb_finish_load_1T + +1: +#ifdef CONFIG_SPARSEMEM_VMEMMAP + cmpldi cr0,r9,0xf + bne 1f +/* Check virtual memmap region. To be patched at kernel boot */ +.globl slb_miss_kernel_load_vmemmap +slb_miss_kernel_load_vmemmap: + li r11,0 + b 6f +1: +#endif /* CONFIG_SPARSEMEM_VMEMMAP */ + + /* + * r10 contains the ESID, which is the original faulting EA shifted + * right by 28 bits. We need to compare that with (H_VMALLOC_END >> 28) + * which is 0xd00038000. That can't be used as an immediate, even if we + * ignored the 0xd, so we have to load it into a register, and we only + * have one register free. So we must load all of (H_VMALLOC_END >> 28) + * into a register and compare ESID against that. + */ + lis r11,(H_VMALLOC_END >> 32)@h // r11 = 0xffffffffd0000000 + ori r11,r11,(H_VMALLOC_END >> 32)@l // r11 = 0xffffffffd0003800 + // Rotate left 4, then mask with 0xffffffff0 + rldic r11,r11,4,28 // r11 = 0xd00038000 + cmpld r10,r11 // if r10 >= r11 + bge 5f // goto io_mapping + + /* + * vmalloc mapping gets the encoding from the PACA as the mapping + * can be demoted from 64K -> 4K dynamically on some machines. + */ + lhz r11,PACAVMALLOCSLLP(r13) + b 6f +5: + /* IO mapping */ +.globl slb_miss_kernel_load_io +slb_miss_kernel_load_io: + li r11,0 +6: + /* + * context = (ea >> 60) - (0xc - 1) + * r9 = region id. + */ + subi r9,r9,KERNEL_REGION_CONTEXT_OFFSET + +BEGIN_FTR_SECTION + b .Lslb_finish_load +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) + b .Lslb_finish_load_1T + +0: /* + * For userspace addresses, make sure this is region 0. + */ + cmpdi r9, 0 + bne- 8f + /* + * user space make sure we are within the allowed limit + */ + ld r11,PACA_SLB_ADDR_LIMIT(r13) + cmpld r3,r11 + bge- 8f + + /* when using slices, we extract the psize off the slice bitmaps + * and then we need to get the sllp encoding off the mmu_psize_defs + * array. + * + * XXX This is a bit inefficient especially for the normal case, + * so we should try to implement a fast path for the standard page + * size using the old sllp value so we avoid the array. We cannot + * really do dynamic patching unfortunately as processes might flip + * between 4k and 64k standard page size + */ +#ifdef CONFIG_PPC_MM_SLICES + /* r10 have esid */ + cmpldi r10,16 + /* below SLICE_LOW_TOP */ + blt 5f + /* + * Handle hpsizes, + * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index + */ + srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */ + addi r9,r11,PACAHIGHSLICEPSIZE + lbzx r9,r13,r9 /* r9 is hpsizes[r11] */ + /* r11 = (r10 >> (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)) & 0x1 */ + rldicl r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63 + b 6f + +5: + /* + * Handle lpsizes + * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index + */ + srdi r11,r10,1 /* index */ + addi r9,r11,PACALOWSLICESPSIZE + lbzx r9,r13,r9 /* r9 is lpsizes[r11] */ + rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */ +6: + sldi r11,r11,2 /* index * 4 */ + /* Extract the psize and multiply to get an array offset */ + srd r9,r9,r11 + andi. r9,r9,0xf + mulli r9,r9,MMUPSIZEDEFSIZE + + /* Now get to the array and obtain the sllp + */ + ld r11,PACATOC(r13) + ld r11,mmu_psize_defs@got(r11) + add r11,r11,r9 + ld r11,MMUPSIZESLLP(r11) + ori r11,r11,SLB_VSID_USER +#else + /* paca context sllp already contains the SLB_VSID_USER bits */ + lhz r11,PACACONTEXTSLLP(r13) +#endif /* CONFIG_PPC_MM_SLICES */ + + ld r9,PACACONTEXTID(r13) +BEGIN_FTR_SECTION + cmpldi r10,0x1000 + bge .Lslb_finish_load_1T +END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) + b .Lslb_finish_load + +8: /* invalid EA - return an error indication */ + crset 4*cr0+eq /* indicate failure */ + blr + +/* + * Finish loading of an SLB entry and return + * + * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET + */ +.Lslb_finish_load: + rldimi r10,r9,ESID_BITS,0 + ASM_VSID_SCRAMBLE(r10,r9,r11,256M) + /* r3 = EA, r11 = VSID data */ + /* + * Find a slot, round robin. Previously we tried to find a + * free slot first but that took too long. Unfortunately we + * dont have any LRU information to help us choose a slot. + */ + + mr r9,r3 + + /* slb_finish_load_1T continues here. r9=EA with non-ESID bits clear */ +7: ld r10,PACASTABRR(r13) + addi r10,r10,1 + /* This gets soft patched on boot. */ +.globl slb_compare_rr_to_size +slb_compare_rr_to_size: + cmpldi r10,0 + + blt+ 4f + li r10,SLB_NUM_BOLTED + +4: + std r10,PACASTABRR(r13) + +3: + rldimi r9,r10,0,36 /* r9 = EA[0:35] | entry */ + oris r10,r9,SLB_ESID_V@h /* r10 = r9 | SLB_ESID_V */ + + /* r9 = ESID data, r11 = VSID data */ + + /* + * No need for an isync before or after this slbmte. The exception + * we enter with and the rfid we exit with are context synchronizing. + */ + slbmte r11,r10 + + /* we're done for kernel addresses */ + crclr 4*cr0+eq /* set result to "success" */ + bgelr cr7 + + /* Update the slb cache */ + lhz r9,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ + cmpldi r9,SLB_CACHE_ENTRIES + bge 1f + + /* still room in the slb cache */ + sldi r11,r9,2 /* r11 = offset * sizeof(u32) */ + srdi r10,r10,28 /* get the 36 bits of the ESID */ + add r11,r11,r13 /* r11 = (u32 *)paca + offset */ + stw r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */ + addi r9,r9,1 /* offset++ */ + b 2f +1: /* offset >= SLB_CACHE_ENTRIES */ + li r9,SLB_CACHE_ENTRIES+1 +2: + sth r9,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ + crclr 4*cr0+eq /* set result to "success" */ + blr + +/* + * Finish loading of a 1T SLB entry (for the kernel linear mapping) and return. + * + * r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9 + */ +.Lslb_finish_load_1T: + srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */ + rldimi r10,r9,ESID_BITS_1T,0 + ASM_VSID_SCRAMBLE(r10,r9,r11,1T) + + li r10,MMU_SEGSIZE_1T + rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */ + + /* r3 = EA, r11 = VSID data */ + clrrdi r9,r3,SID_SHIFT_1T /* clear out non-ESID bits */ + b 7b + + +_ASM_NOKPROBE_SYMBOL(slb_allocate) +_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear) +_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io) +_ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size) +#ifdef CONFIG_SPARSEMEM_VMEMMAP +_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_vmemmap) +#endif diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index fc5b3a1ec666..205fe557ca10 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -207,6 +207,23 @@ static bool slice_check_range_fits(struct mm_struct *mm, return true; } +static void slice_flush_segments(void *parm) +{ +#ifdef CONFIG_PPC64 + struct mm_struct *mm = parm; + unsigned long flags; + + if (mm != current->active_mm) + return; + + copy_mm_to_paca(current->active_mm); + + local_irq_save(flags); + slb_flush_and_rebolt(); + local_irq_restore(flags); +#endif +} + static void slice_convert(struct mm_struct *mm, const struct slice_mask *mask, int psize) { @@ -272,9 +289,6 @@ static void slice_convert(struct mm_struct *mm, spin_unlock_irqrestore(&slice_convert_lock, flags); copro_flush_all_slbs(mm); -#ifdef CONFIG_PPC64 - core_flush_all_slbs(mm); -#endif } /* @@ -488,9 +502,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, * be already initialised beyond the old address limit. */ mm->context.slb_addr_limit = high_limit; -#ifdef CONFIG_PPC64 - core_flush_all_slbs(mm); -#endif + + on_each_cpu(slice_flush_segments, mm, 1); } /* Sanity checks */ @@ -652,10 +665,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, (SLICE_NUM_HIGH && !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) { slice_convert(mm, &potential_mask, psize); -#ifdef CONFIG_PPC64 if (psize > MMU_PAGE_BASE) - core_flush_all_slbs(mm); -#endif + on_each_cpu(slice_flush_segments, mm, 1); } return newaddr; @@ -746,20 +757,6 @@ void slice_init_new_context_exec(struct mm_struct *mm) bitmap_fill(mask->high_slices, SLICE_NUM_HIGH); } -#ifdef CONFIG_PPC_BOOK3S_64 -void slice_setup_new_exec(void) -{ - struct mm_struct *mm = current->mm; - - slice_dbg("slice_setup_new_exec(mm=%p)\n", mm); - - if (!is_32bit_task()) - return; - - mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW; -} -#endif - void slice_set_range_psize(struct mm_struct *mm, unsigned long start, unsigned long len, unsigned int psize) { diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 694c1d92e796..c70d17c9a6ba 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2394,9 +2394,7 @@ static void dump_one_paca(int cpu) } } DUMP(p, vmalloc_sllp, "%#-*x"); - DUMP(p, stab_rr, "%#-*x"); - DUMP(p, slb_used_bitmap, "%#-*x"); - DUMP(p, slb_kern_bitmap, "%#-*x"); + DUMP(p, stab_rr, "%#-*llx"); if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { DUMP(p, slb_cache_ptr, "%#-*x"); -- 2.30.2